diff --git a/Directory.Packages.props b/Directory.Packages.props index 7ef35e13b..87f4bd481 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -48,7 +48,8 @@ - + + diff --git a/PLAN-rules-config.md b/PLAN-rules-config.md deleted file mode 100644 index 94fc33bbd..000000000 --- a/PLAN-rules-config.md +++ /dev/null @@ -1,194 +0,0 @@ -# Improved Rules Configuration Format - -## Context - -The `block` section in `changelog.yml` is being redesigned and renamed to `rules:`. Goals: -1. Explicit matching semantics (`any` vs `all`) -2. Per-field include/exclude modes for types and areas -3. Product overrides nested under the section they affect -4. Clear, scannable log messages prefixed with `[+include]` / `[-exclude]` -5. No backward compat — error if old `block:` key is seen - -## YAML Format - -```yaml -rules: - # Global match default for multi-valued fields (labels, areas). - # any (default) = match if ANY item matches the list - # all = match only if ALL items match the list - # Inherited by create, publish, and all product overrides. - # match: any - - # Create — controls which PRs generate changelog entries. - # exclude: block PRs with these labels (comma-separated) - # include: only create changelogs for PRs with these labels - # Cannot specify both. - # - # create: - # exclude: ">non-issue, >test" - # # match: any - # products: - # 'elasticsearch, kibana': - # exclude: ">test" - # 'cloud-serverless': - # exclude: "ILM" - - # Publish — controls which entries appear in rendered output. - # exclude_types / include_types - # exclude_areas / include_areas - # Cannot mix exclude_ and include_ for the same field. - # - # match_areas inherits from rules.match if not specified. - # - # publish: - # # match_areas: any - # exclude_types: - # - deprecation - # - known-issue - # exclude_areas: - # - "Internal" - # products: - # 'elasticsearch, kibana': - # exclude_types: - # - docs - # 'cloud-serverless': - # # match_areas: any - # include_areas: - # - "Search" - # - "Monitoring" -``` - -### Match inheritance - -``` -rules.match (global default, "any" if omitted) - ├─ create.match → create.products.{id}.match - └─ publish.match_areas → publish.products.{id}.match_areas -``` - -### Area matching examples - -| Config | Entry areas: `["Search", "Internal"]` | Result | -|--------|--------------------------------------|--------| -| `exclude_areas: [Internal]`, match `any` | "Internal" matches | **Blocked** | -| `exclude_areas: [Internal]`, match `all` | Not all match | **Allowed** | -| `include_areas: [Search]`, match `any` | "Search" matches | **Allowed** | -| `include_areas: [Search]`, match `all` | "Internal" not in list | **Blocked** | - -## Error Messages - -### Validation (config parsing) - -| Condition | Message | -|-----------|---------| -| Old `block:` key found | `'block' is no longer supported. Rename to 'rules'. See changelog.example.yml.` | -| Both `exclude_types` + `include_types` | `rules.publish: cannot have both 'exclude_types' and 'include_types'. Use one or the other.` | -| Both `exclude_areas` + `include_areas` | Same pattern | -| Both `create.exclude` + `create.include` | `rules.create: cannot have both 'exclude' and 'include'. Use one or the other.` | -| Invalid match value | `rules.match: '{value}' is not valid. Use 'any' or 'all'.` | -| Empty list | `rules.publish.exclude_types: list is empty. Add types or remove the field.` | -| Unknown product | `rules.publish.products: '{id}' not in available products. Available: {list}` | - -### Runtime (create/publish time) - -Prefixed with `[-exclude]` or `[+include]` for scanning: - -**Create:** -- `[-exclude] PR #{n}: skipped, label '{label}' matches rules.create.exclude (match: {mode})` -- `[+include] PR #{n}: created, label '{label}' matches rules.create.include (match: {mode})` -- `[+include] PR #{n}: skipped, no labels match rules.create.include [{labels}] (match: {mode})` -- Product: `[-exclude] PR #{n} ({product}): skipped, label '{label}' matches rules.create.products.{product}.exclude` - -**Publish:** -- `[-exclude] PR #{n}: hidden, type '{type}' in rules.publish.exclude_types` -- `[+include] PR #{n}: hidden, type '{type}' not in rules.publish.include_types` -- `[-exclude] PR #{n}: hidden, area '{area}' in rules.publish.exclude_areas (match_areas: {mode})` -- `[-exclude] PR #{n}: hidden, all areas [{areas}] in rules.publish.exclude_areas (match_areas: all)` -- `[+include] PR #{n}: hidden, areas [{areas}] not in rules.publish.include_areas (match_areas: {mode})` -- Product: same patterns with `rules.publish.products.{product}.` prefix - -## Files to Modify - -### 1. Domain model — enums and PublishBlocker -**`src/Elastic.Documentation/ReleaseNotes/PublishBlocker.cs`** - -- Add `MatchMode` enum (`Any`, `All`) -- Add `FieldMode` enum (`Exclude`, `Include`) -- Add to `PublishBlocker`: `MatchAreas` (MatchMode), `TypesMode` (FieldMode), `AreasMode` (FieldMode) - -### 2. Domain model — rename and restructure BlockConfiguration -**`src/Elastic.Documentation.Configuration/Changelog/BlockConfiguration.cs`** - -Rename to `RulesConfiguration` (or new file). Structure: -- `RulesConfiguration`: `Match` (MatchMode), `Create` (CreateRules?), `Publish` (PublishRules?) -- `CreateRules`: `Labels` (list), `Mode` (FieldMode), `Match` (MatchMode?), `ByProduct` (dict) -- `PublishRules`: `PublishBlocker` fields + `ByProduct` (dict of product-specific `PublishBlocker`s) -- Delete old `ProductBlockers` record - -### 3. Core blocking logic -**`src/Elastic.Documentation/ReleaseNotes/PublishBlockerExtensions.cs`** - -- `MatchesType()`: type vs list -- `MatchesArea()`: any/all matching -- `ShouldBlock()`: per-field mode (`Exclude` + match → blocked; `Include` + no match → blocked) - -### 4. YAML DTO (CLI path) -**`src/services/Elastic.Changelog/Serialization/ChangelogConfigurationYaml.cs`** - -- Rename `BlockConfigurationYaml` → `RulesConfigurationYaml` -- New `CreateRulesYaml`: `Exclude`/`Include` (string), `Match` (string?), `Products` (dict) -- Update `PublishBlockerYaml`: `MatchAreas`, `ExcludeTypes`/`IncludeTypes`, `ExcludeAreas`/`IncludeAreas`, `Products` (dict) -- Remove old fields (`Types`, `Areas`, `Create` string, root `Product`) -- Update parent `ChangelogConfigurationYaml`: rename `Block` → `Rules` - -### 5. YAML DTO (minimal/inline path) -**`src/Elastic.Documentation.Configuration/ReleaseNotes/ReleaseNotesSerialization.cs`** - -Mirror changes for minimal DTOs. Rename `BlockConfigMinimalDto` → `RulesConfigMinimalDto`, etc. - -### 6. Configuration parsing + validation -**`src/services/Elastic.Changelog/Configuration/ChangelogConfigurationLoader.cs`** - -- Detect old `block:` key → emit error -- Parse `rules:` with new structure -- Validate mutual exclusivity, match values, empty lists -- Resolve match inheritance chain - -### 7. Create blocking logic -Find where create labels are checked and update for include/exclude + match + runtime messages. - -### 8. Rendering utilities -**`src/services/Elastic.Changelog/Rendering/ChangelogRenderUtilities.cs`** - -- Update for new `publish.products` structure -- Add `[-exclude]` / `[+include]` prefixed runtime log messages - -### 9. Example config -**`config/changelog.example.yml`** — replace `block:` section with `rules:`. - -### 10. All references to BlockConfiguration -Find and update all code referencing `BlockConfiguration`, `Block`, `ProductBlockers` to use new names. - -### 11. Tests - -**Unit tests** (`PublishBlockerExtensionsTests.cs`): -- All mode/match combinations (exclude×any, exclude×all, include×any, include×all) -- Mixed modes (exclude_types + include_areas) -- Match inheritance (global → section → product) - -**Integration tests** (`BlockConfigurationTests.cs`): -- New format end-to-end -- Validation error messages (mutual exclusivity, invalid match, old `block:` key) -- Product overrides under publish.products and create.products -- Create include/exclude + match -- Runtime message prefixes `[-exclude]` / `[+include]` - -## Verification - -1. New unit tests for all mode/match combinations -2. Integration tests with new config format -3. Validation error tests — verify all error messages -4. Old `block:` key → error test -5. YAML parsing on both CLI and minimal paths -6. Runtime messages at create and publish time with correct prefixes -7. Match inheritance chain works correctly diff --git a/docs/cli/assembler/assembler-index.md b/docs/cli/assembler/assembler-index.md index 5d551e4b4..8ae72ddcd 100644 --- a/docs/cli/assembler/assembler-index.md +++ b/docs/cli/assembler/assembler-index.md @@ -29,9 +29,6 @@ docs-builder assembler index [options...] [-h|--help] [--version] `--password` `` : Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD (optional) -`--no-semantic` `` -: Index without semantic fields (optional) - `--search-num-threads` `` : The number of search threads the inference endpoint should use. Defaults: 8 (optional) diff --git a/docs/cli/docset/index-command.md b/docs/cli/docset/index-command.md index 32aa3a25b..00e28cf1c 100644 --- a/docs/cli/docset/index-command.md +++ b/docs/cli/docset/index-command.md @@ -25,9 +25,6 @@ docs-builder index [options...] [-h|--help] [--version] `--password` `` : Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD (optional) -`--no-semantic` `` -: Index without semantic fields (optional) - `--search-num-threads` `` : The number of search threads the inference endpoint should use. Defaults: 8 (optional) diff --git a/src/Elastic.Codex/Building/CodexBuildService.cs b/src/Elastic.Codex/Building/CodexBuildService.cs index 8db6350d2..a502ff52c 100644 --- a/src/Elastic.Codex/Building/CodexBuildService.cs +++ b/src/Elastic.Codex/Building/CodexBuildService.cs @@ -85,7 +85,7 @@ public async Task BuildAll( if (exporters is not null && buildContexts.Count > 0) { var firstContext = buildContexts[0].BuildContext; - sharedExporters = exporters.CreateMarkdownExporters(logFactory, firstContext, context.IndexNamespace).ToArray(); + sharedExporters = exporters.CreateMarkdownExporters(logFactory, firstContext, "codex").ToArray(); var startTasks = sharedExporters.Select(async e => await e.StartAsync(ctx)); await Task.WhenAll(startTasks); } diff --git a/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs b/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs index 367fe844b..0e6ee09ce 100644 --- a/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs +++ b/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs @@ -9,6 +9,7 @@ namespace Elastic.Documentation.Configuration; public class DocumentationEndpoints { public required ElasticsearchEndpoint Elasticsearch { get; init; } + public string Namespace { get; set; } = "dev"; } public class ElasticsearchEndpoint @@ -25,9 +26,6 @@ public class ElasticsearchEndpoint public int IndexNumThreads { get; set; } = 4; // Reduced for Serverless rate limits public bool NoElasticInferenceService { get; set; } - // index options - public string IndexNamePrefix { get; set; } = "semantic-docs"; - // channel buffer options public int BufferSize { get; set; } = 50; // Reduced for Serverless rate limits public int MaxRetries { get; set; } = 5; // Increased for 429 retries @@ -43,7 +41,6 @@ public class ElasticsearchEndpoint public X509Certificate? Certificate { get; set; } public bool CertificateIsNotRoot { get; set; } public int? BootstrapTimeout { get; set; } - public bool NoSemantic { get; set; } public bool ForceReindex { get; set; } /// diff --git a/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs b/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs index e8031368c..4bd1586c1 100644 --- a/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs +++ b/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs @@ -21,7 +21,6 @@ public record ElasticsearchIndexOptions public string? Password { get; init; } // inference options - public bool? NoSemantic { get; init; } public bool? EnableAiEnrichment { get; init; } public int? SearchNumThreads { get; init; } public int? IndexNumThreads { get; init; } @@ -29,7 +28,6 @@ public record ElasticsearchIndexOptions public int? BootstrapTimeout { get; init; } // index options - public string? IndexNamePrefix { get; init; } public bool? ForceReindex { get; init; } // channel buffer options @@ -85,8 +83,6 @@ public static async Task ApplyAsync( cfg.IndexNumThreads = options.IndexNumThreads.Value; if (options.NoEis.HasValue) cfg.NoElasticInferenceService = options.NoEis.Value; - if (!string.IsNullOrEmpty(options.IndexNamePrefix)) - cfg.IndexNamePrefix = options.IndexNamePrefix; if (options.BufferSize.HasValue) cfg.BufferSize = options.BufferSize.Value; if (options.MaxRetries.HasValue) @@ -117,8 +113,6 @@ public static async Task ApplyAsync( if (options.BootstrapTimeout.HasValue) cfg.BootstrapTimeout = options.BootstrapTimeout.Value; - if (options.NoSemantic.HasValue) - cfg.NoSemantic = options.NoSemantic.Value; if (options.EnableAiEnrichment.HasValue) cfg.EnableAiEnrichment = options.EnableAiEnrichment.Value; if (options.ForceReindex.HasValue) diff --git a/src/Elastic.Documentation.ServiceDefaults/AppDefaultsExtensions.cs b/src/Elastic.Documentation.ServiceDefaults/AppDefaultsExtensions.cs index eae34aeac..4b3a497eb 100644 --- a/src/Elastic.Documentation.ServiceDefaults/AppDefaultsExtensions.cs +++ b/src/Elastic.Documentation.ServiceDefaults/AppDefaultsExtensions.cs @@ -45,6 +45,9 @@ public static TBuilder AddDocumentationServiceDefaults(this TBuilder b _ = builder.Services.AddElasticDocumentationLogging(globalArgs.LogLevel, noConsole: globalArgs.IsMcp); _ = services.AddSingleton(globalArgs); + var endpoints = ElasticsearchEndpointFactory.Create(builder.Configuration); + _ = services.AddSingleton(endpoints); + return builder.AddServiceDefaults(); } diff --git a/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj b/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj index 4357d65ce..ef3d8edd8 100644 --- a/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj +++ b/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj @@ -17,6 +17,7 @@ + diff --git a/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs b/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs new file mode 100644 index 000000000..86def5bbc --- /dev/null +++ b/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs @@ -0,0 +1,98 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Elastic.Documentation.Configuration; +using Microsoft.Extensions.Configuration; + +namespace Elastic.Documentation.ServiceDefaults; + +/// Centralizes user-secrets + env-var reading for Elasticsearch configuration. +public static class ElasticsearchEndpointFactory +{ + private const string UserSecretsId = "72f50f33-6fb9-4d08-bff3-39568fe370b3"; + + /// + /// Creates from user secrets and environment variables. + /// Returns null when no URL is available. + /// + public static DocumentationEndpoints Create(IConfiguration? appConfiguration = null) + { + var configBuilder = new ConfigurationBuilder(); + _ = configBuilder.AddUserSecrets(UserSecretsId); + _ = configBuilder.AddEnvironmentVariables(); + var config = configBuilder.Build(); + + var url = + config["Parameters:DocumentationElasticUrl"] + ?? config["DOCUMENTATION_ELASTIC_URL"]; + + var apiKey = + config["Parameters:DocumentationElasticApiKey"] + ?? config["DOCUMENTATION_ELASTIC_APIKEY"]; + + var password = + config["Parameters:DocumentationElasticPassword"] + ?? config["DOCUMENTATION_ELASTIC_PASSWORD"]; + + var username = + config["Parameters:DocumentationElasticUsername"] + ?? config["DOCUMENTATION_ELASTIC_USERNAME"] + ?? "elastic"; + + if (string.IsNullOrEmpty(url)) + { + return new DocumentationEndpoints + { + Elasticsearch = new ElasticsearchEndpoint { Uri = new Uri("http://localhost:9200") } + }; + } + + var endpoint = new ElasticsearchEndpoint + { + Uri = new Uri(url), + ApiKey = apiKey, + Password = password, + Username = username + }; + + var ns = ResolveEnvironment(config, appConfiguration); + + return new DocumentationEndpoints { Elasticsearch = endpoint, Namespace = ns }; + } + + /// + /// Resolves the environment name using this priority: + /// 1. DOCUMENTATION_ELASTIC_INDEX env var — parse old format {variant}-docs-{env}-{timestamp} + /// 2. DOTNET_ENVIRONMENT env var + /// 3. ENVIRONMENT env var + /// 4. Fallback: "dev" + /// + private static string ResolveEnvironment(IConfiguration config, IConfiguration? appConfiguration) + { + var indexName = appConfiguration?["DOCUMENTATION_ELASTIC_INDEX"] + ?? config["DOCUMENTATION_ELASTIC_INDEX"]; + + if (!string.IsNullOrEmpty(indexName)) + { + // Old production format: {variant}-docs-{env}-{timestamp} + // e.g. "lexical-docs-edge-2025.10.23.120521" + // Extract the environment segment after "docs-" and before the next "-" followed by digits. + const string marker = "-docs-"; + var markerIndex = indexName.IndexOf(marker, StringComparison.OrdinalIgnoreCase); + if (markerIndex >= 0) + { + var afterMarker = indexName[(markerIndex + marker.Length)..]; + var dashIndex = afterMarker.IndexOf('-'); + var env = dashIndex > 0 ? afterMarker[..dashIndex] : afterMarker; + if (!string.IsNullOrEmpty(env) && (dashIndex < 0 || char.IsDigit(afterMarker[dashIndex + 1]))) + return env.ToLowerInvariant(); + } + } + + var envVar = config["DOTNET_ENVIRONMENT"] + ?? config["ENVIRONMENT"]; + + return !string.IsNullOrEmpty(envVar) ? envVar.ToLowerInvariant() : "dev"; + } +} diff --git a/src/Elastic.Documentation/Elastic.Documentation.csproj b/src/Elastic.Documentation/Elastic.Documentation.csproj index 99b59c073..fbc2f8c72 100644 --- a/src/Elastic.Documentation/Elastic.Documentation.csproj +++ b/src/Elastic.Documentation/Elastic.Documentation.csproj @@ -9,6 +9,7 @@ + diff --git a/src/Elastic.Documentation/Search/ContentHash.cs b/src/Elastic.Documentation/Search/ContentHash.cs new file mode 100644 index 000000000..17eb2e7ae --- /dev/null +++ b/src/Elastic.Documentation/Search/ContentHash.cs @@ -0,0 +1,19 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Security.Cryptography; +using System.Text; + +namespace Elastic.Documentation.Search; + +/// Creates a short hex hash from one or more string components. +public static class ContentHash +{ + /// + /// Concatenates all components, computes SHA-256, and returns the first 16 hex characters (lowercased). + /// Compatible with HashedBulkUpdate.CreateHash. + /// + public static string Create(params string[] components) => + Convert.ToHexString(SHA256.HashData(Encoding.UTF8.GetBytes(string.Join("", components))))[..16].ToLowerInvariant(); +} diff --git a/src/Elastic.Documentation/Search/DocumentationDocument.cs b/src/Elastic.Documentation/Search/DocumentationDocument.cs index e30a4b350..bfbaace5d 100644 --- a/src/Elastic.Documentation/Search/DocumentationDocument.cs +++ b/src/Elastic.Documentation/Search/DocumentationDocument.cs @@ -4,6 +4,7 @@ using System.Text.Json.Serialization; using Elastic.Documentation.AppliesTo; +using Elastic.Mapping; namespace Elastic.Documentation.Search; @@ -12,6 +13,7 @@ public record ParentDocument [JsonPropertyName("title")] public required string Title { get; set; } + [Keyword] [JsonPropertyName("url")] public required string Url { get; set; } } @@ -28,6 +30,7 @@ public record DocumentationDocument [JsonPropertyName("search_title")] public required string SearchTitle { get; set; } + [Keyword(Normalizer = "keyword_normalizer")] [JsonPropertyName("type")] public required string Type { get; set; } = "doc"; @@ -35,6 +38,7 @@ public record DocumentationDocument /// The canonical/primary product for this document (nested object with id and repository). /// Name and version are looked up dynamically by product id. /// + [Object] [JsonPropertyName("product")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public IndexedProduct? Product { get; set; } @@ -42,13 +46,18 @@ public record DocumentationDocument /// /// All related products found during inference (from legacy mappings, applicability, etc.) /// + [Object] [JsonPropertyName("related_products")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public IndexedProduct[]? RelatedProducts { get; set; } + [Id] + [Keyword] [JsonPropertyName("url")] public required string Url { get; set; } = string.Empty; + [ContentHash] + [Keyword] [JsonPropertyName("hash")] public string Hash { get; set; } = string.Empty; @@ -58,27 +67,33 @@ public record DocumentationDocument [JsonPropertyName("navigation_table_of_contents")] public int NavigationTableOfContents { get; set; } = 50; //default to a high number so that omission gets penalized. + [Keyword(Normalizer = "keyword_normalizer")] [JsonPropertyName("navigation_section")] public string? NavigationSection { get; set; } /// The date of the batch update this document was part of last. /// This date could be higher than the date_last_updated. + [BatchIndexDate] [JsonPropertyName("batch_index_date")] public DateTimeOffset BatchIndexDate { get; set; } /// The date this document was last updated, + [LastUpdated] + [Timestamp] [JsonPropertyName("last_updated")] public DateTimeOffset LastUpdated { get; set; } [JsonPropertyName("description")] public string? Description { get; set; } + [Text] [JsonPropertyName("headings")] public string[] Headings { get; set; } = []; [JsonPropertyName("links")] public string[] Links { get; set; } = []; + [Nested] [JsonPropertyName("applies_to")] public ApplicableTo? Applies { get; set; } @@ -92,6 +107,7 @@ public record DocumentationDocument [JsonPropertyName("abstract")] public string? Abstract { get; set; } + [Object] [JsonPropertyName("parents")] public ParentDocument[] Parents { get; set; } = []; @@ -105,6 +121,7 @@ public record DocumentationDocument /// Key for enrichment cache lookups. Derived from normalized content + prompt hash. /// Used by enrich processor to join AI-generated fields at index time. /// + [Keyword] [JsonPropertyName("enrichment_key")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? EnrichmentKey { get; set; } @@ -112,6 +129,7 @@ public record DocumentationDocument /// /// 3-5 sentences dense with technical entities, API names, and core functionality for vector matching. /// + [Text] [JsonPropertyName("ai_rag_optimized_summary")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? AiRagOptimizedSummary { get; set; } @@ -119,6 +137,7 @@ public record DocumentationDocument /// /// Exactly 5-10 words for a UI tooltip. /// + [Text] [JsonPropertyName("ai_short_summary")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? AiShortSummary { get; set; } @@ -126,6 +145,7 @@ public record DocumentationDocument /// /// A 3-8 word keyword string representing a high-intent user search for this doc. /// + [Keyword] [JsonPropertyName("ai_search_query")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? AiSearchQuery { get; set; } @@ -133,6 +153,7 @@ public record DocumentationDocument /// /// Array of 3-5 specific questions answered by this document. /// + [Text] [JsonPropertyName("ai_questions")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string[]? AiQuestions { get; set; } @@ -140,6 +161,7 @@ public record DocumentationDocument /// /// Array of 2-4 specific use cases this doc helps with. /// + [Text] [JsonPropertyName("ai_use_cases")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string[]? AiUseCases { get; set; } @@ -148,6 +170,7 @@ public record DocumentationDocument /// Hash of the LLM prompt templates used to generate AI fields. /// Used to detect stale enrichments when prompts change. /// + [Keyword] [JsonPropertyName("enrichment_prompt_hash")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? EnrichmentPromptHash { get; set; } diff --git a/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs new file mode 100644 index 000000000..0e1acc17f --- /dev/null +++ b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs @@ -0,0 +1,150 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Elastic.Mapping; +using Elastic.Mapping.Analysis; +using Elastic.Mapping.Mappings; + +namespace Elastic.Documentation.Search; + +[ElasticsearchMappingContext] +[Index( + NameTemplate = "docs-{type}.lexical-{env}", + DatePattern = "yyyy.MM.dd.HHmmss", + Configuration = typeof(LexicalConfig) +)] +[Index( + NameTemplate = "docs-{type}.semantic-{env}", + Variant = "Semantic", + DatePattern = "yyyy.MM.dd.HHmmss", + Configuration = typeof(SemanticConfig) +)] +public static partial class DocumentationMappingContext; + +public class LexicalConfig : IConfigureElasticsearch +{ + public MappingsBuilder ConfigureMappings(MappingsBuilder mappings) => + ConfigureCommonMappings(mappings) + .StrippedBody(f => f + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer") + ); + + internal static MappingsBuilder ConfigureCommonMappings(MappingsBuilder m) => m + // Text fields with custom analyzers and multi-fields + .SearchTitle(f => f + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer") + .MultiField("completion", mf => mf.SearchAsYouType() + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer"))) + .Title(f => f + .SearchAnalyzer("synonyms_analyzer") + .MultiField("keyword", mf => mf.Keyword().Normalizer("keyword_normalizer")) + .MultiField("starts_with", mf => mf.Text() + .Analyzer("starts_with_analyzer") + .SearchAnalyzer("starts_with_analyzer_search")) + .MultiField("completion", mf => mf.SearchAsYouType().SearchAnalyzer("synonyms_analyzer"))) + .Abstract(f => f + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer")) + .Headings(f => f + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer")) + // JsonIgnore fields — [Text]/[Keyword] attributes handle the type, + // AddField only needed when custom analyzers are required + .AddField("ai_rag_optimized_summary", f => f.Text() + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer")) + // Keyword fields with multi-fields + .Url(f => f + .MultiField("match", mf => mf.Text()) + .MultiField("prefix", mf => mf.Text().Analyzer("hierarchy_analyzer"))) + // Rank features — no attribute available, must use AddField + .AddField("navigation_depth", f => f.RankFeature().PositiveScoreImpact(false)) + .AddField("navigation_table_of_contents", f => f.RankFeature().PositiveScoreImpact(false)) + // Nested applies_to — sub-fields don't match C# structure (custom JsonConverter) + .AddField("applies_to.type", f => f.Keyword().Normalizer("keyword_normalizer")) + .AddField("applies_to.sub-type", f => f.Keyword().Normalizer("keyword_normalizer")) + .AddField("applies_to.lifecycle", f => f.Keyword().Normalizer("keyword_normalizer")) + .AddField("applies_to.version", f => f.Version()) + // Parent document multi-fields + .AddField("parents.url", f => f.Keyword() + .MultiField("match", mf => mf.Text()) + .MultiField("prefix", mf => mf.Text().Analyzer("hierarchy_analyzer"))) + .AddField("parents.title", f => f.Text() + .SearchAnalyzer("synonyms_analyzer") + .MultiField("keyword", mf => mf.Keyword())); +} + +public class SemanticConfig : IConfigureElasticsearch +{ + private const string ElserInferenceId = ".elser-2-elastic"; + private const string JinaInferenceId = ".jina-embeddings-v5-text-small"; + + public MappingsBuilder ConfigureMappings(MappingsBuilder mappings) => + LexicalConfig.ConfigureCommonMappings(mappings) + .StrippedBody(s => s + .Analyzer("synonyms_fixed_analyzer") + .SearchAnalyzer("synonyms_analyzer") + ) + // ELSER sparse embeddings + .AddField("title.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId)) + .AddField("abstract.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId)) + .AddField("ai_rag_optimized_summary.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId)) + .AddField("ai_questions.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId)) + .AddField("ai_use_cases.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId)) + // Jina v5 dense embeddings + .AddField("title.jina", f => f.SemanticText().InferenceId(JinaInferenceId)) + .AddField("abstract.jina", f => f.SemanticText().InferenceId(JinaInferenceId)) + .AddField("ai_rag_optimized_summary.jina", f => f.SemanticText().InferenceId(JinaInferenceId)) + .AddField("ai_questions.jina", f => f.SemanticText().InferenceId(JinaInferenceId)) + .AddField("ai_use_cases.jina", f => f.SemanticText().InferenceId(JinaInferenceId)); +} + +/// +/// Builds analysis settings at runtime (includes synonyms that are loaded from configuration). +/// +public static class DocumentationAnalysisFactory +{ + public static AnalysisBuilder BuildAnalysis(AnalysisBuilder analysis, string synonymSetName, string[] indexTimeSynonyms) => analysis + .Normalizer("keyword_normalizer", n => n.Custom() + .CharFilter("strip_non_word_chars") + .Filters("lowercase", "asciifolding", "trim")) + .Analyzer("starts_with_analyzer", a => a.Custom() + .Tokenizer("starts_with_tokenizer") + .Filter("lowercase")) + .Analyzer("starts_with_analyzer_search", a => a.Custom() + .Tokenizer("keyword") + .Filter("lowercase")) + .Analyzer("synonyms_fixed_analyzer", a => a.Custom() + .Tokenizer("group_tokenizer") + .Filters("lowercase", "synonyms_fixed_filter", "kstem")) + .Analyzer("synonyms_analyzer", a => a.Custom() + .Tokenizer("group_tokenizer") + .Filters("lowercase", "synonyms_filter", "kstem")) + .Analyzer("highlight_analyzer", a => a.Custom() + .Tokenizer("group_tokenizer") + .Filters("lowercase", "english_stop")) + .Analyzer("hierarchy_analyzer", a => a.Custom() + .Tokenizer("path_tokenizer")) + .CharFilter("strip_non_word_chars", cf => cf.PatternReplace() + .Pattern(@"\W") + .Replacement(" ")) + .TokenFilter("synonyms_fixed_filter", tf => tf.SynonymGraph() + .Synonyms(indexTimeSynonyms)) + .TokenFilter("synonyms_filter", tf => tf.SynonymGraph() + .SynonymsSet(synonymSetName) + .Updateable(true)) + .TokenFilter("english_stop", tf => tf.Stop() + .Stopwords("_english_")) + .Tokenizer("starts_with_tokenizer", t => t.EdgeNGram() + .MinGram(1) + .MaxGram(10) + .TokenChars("letter", "digit", "symbol", "whitespace")) + .Tokenizer("group_tokenizer", t => t.CharGroup() + .TokenizeOnChars("whitespace", ",", ";", "?", "!", "(", ")", "&", "'", "\"", "/", "[", "]", "{", "}")) + .Tokenizer("path_tokenizer", t => t.PathHierarchy() + .Delimiter('/')); +} diff --git a/src/Elastic.Documentation/Search/IndexedProduct.cs b/src/Elastic.Documentation/Search/IndexedProduct.cs index ee766fac1..cdb8925e8 100644 --- a/src/Elastic.Documentation/Search/IndexedProduct.cs +++ b/src/Elastic.Documentation/Search/IndexedProduct.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information using System.Text.Json.Serialization; +using Elastic.Mapping; namespace Elastic.Documentation.Search; @@ -15,12 +16,14 @@ public record IndexedProduct /// /// The product ID from products.yml (e.g., "elasticsearch", "kibana", "apm-agent-java") /// + [Keyword(Normalizer = "keyword_normalizer")] [JsonPropertyName("id")] public string? Id { get; init; } /// /// The repository name (e.g., "elasticsearch", "docs-content", "elastic-otel-java") /// + [Keyword(Normalizer = "keyword_normalizer")] [JsonPropertyName("repository")] public string? Repository { get; init; } } diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.Mapping.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.Mapping.cs deleted file mode 100644 index 4e36f7a56..000000000 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.Mapping.cs +++ /dev/null @@ -1,260 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using Elastic.Documentation.Search; -using Elastic.Ingest.Elasticsearch.Catalog; - -namespace Elastic.Markdown.Exporters.Elasticsearch; - -public abstract partial class ElasticsearchIngestChannel - where TChannelOptions : CatalogIndexChannelOptionsBase - where TChannel : CatalogIndexChannel -{ - protected static string CreateMappingSetting(string synonymSetName, string[] synonyms, string? defaultPipeline = null) - { - var indexTimeSynonyms = $"[{string.Join(",", synonyms.Select(r => $"\"{r}\""))}]"; - var pipelineSetting = defaultPipeline is not null ? $"\"default_pipeline\": \"{defaultPipeline}\"," : ""; - // language=json - return - $$$""" - { - {{{pipelineSetting}}} - "analysis": { - "normalizer": { - "keyword_normalizer": { - "type": "custom", - "char_filter": ["strip_non_word_chars"], - "filter": ["lowercase", "asciifolding", "trim"] - } - }, - "analyzer": { - "starts_with_analyzer": { - "tokenizer": "starts_with_tokenizer", - "filter": [ "lowercase" ] - }, - "starts_with_analyzer_search": { - "tokenizer": "keyword", - "filter": [ "lowercase" ] - }, - "synonyms_fixed_analyzer": { - "tokenizer": "group_tokenizer", - "filter": [ - "lowercase", - "synonyms_fixed_filter", - "kstem" - ] - }, - "synonyms_analyzer": { - "tokenizer": "group_tokenizer", - "filter": [ - "lowercase", - "synonyms_filter", - "kstem" - ] - }, - "highlight_analyzer": { - "tokenizer": "group_tokenizer", - "filter": [ - "lowercase", - "english_stop" - ] - }, - "hierarchy_analyzer": { "tokenizer": "path_tokenizer" } - }, - "char_filter": { - "strip_non_word_chars": { - "type": "pattern_replace", - "pattern": "\\W", - "replacement": " " - } - }, - "filter": { - "synonyms_fixed_filter": { - "type": "synonym_graph", - "synonyms": {{{indexTimeSynonyms}}} - }, - "synonyms_filter": { - "type": "synonym_graph", - "synonyms_set": "{{{synonymSetName}}}", - "updateable": true - }, - "english_stop": { - "type": "stop", - "stopwords": "_english_" - } - }, - "tokenizer": { - "starts_with_tokenizer": { - "type": "edge_ngram", - "min_gram": 1, - "max_gram": 10, - "token_chars": [ - "letter", - "digit", - "symbol", - "whitespace" - ] - }, - "group_tokenizer": { - "type": "char_group", - "tokenize_on_chars": [ "whitespace", ",", ";", "?", "!", "(", ")", "&", "'", "\"", "/", "[", "]", "{", "}" ] - }, - "path_tokenizer": { - "type": "path_hierarchy", - "delimiter": "/" - } - } - } - } - """; - } - - // language=json - protected static string CreateMapping(string? inferenceId) => - $$""" - { - "properties": { - "type": { "type" : "keyword", "normalizer": "keyword_normalizer" }, - "product": { - "type": "object", - "properties": { - "id": { "type": "keyword", "normalizer": "keyword_normalizer" }, - "repository": { "type": "keyword", "normalizer": "keyword_normalizer" } - } - }, - "related_products": { - "type": "object", - "properties": { - "id": { "type": "keyword", "normalizer": "keyword_normalizer" }, - "repository": { "type": "keyword", "normalizer": "keyword_normalizer" } - } - }, - "url": { - "type": "keyword", - "fields": { - "match": { "type": "text" }, - "prefix": { "type": "text", "analyzer" : "hierarchy_analyzer" } - } - }, - "navigation_depth" : { "type" : "rank_feature", "positive_score_impact": false }, - "navigation_table_of_contents" : { "type" : "rank_feature", "positive_score_impact": false }, - "navigation_section" : { "type" : "keyword", "normalizer": "keyword_normalizer" }, - "hidden" : { - "type" : "boolean" - }, - "applies_to" : { - "type" : "nested", - "properties" : { - "type" : { "type" : "keyword", "normalizer": "keyword_normalizer" }, - "sub-type" : { "type" : "keyword", "normalizer": "keyword_normalizer" }, - "lifecycle" : { "type" : "keyword", "normalizer": "keyword_normalizer" }, - "version" : { "type" : "version" } - } - }, - "parents" : { - "type" : "object", - "properties" : { - "url" : { - "type": "keyword", - "fields": { - "match": { "type": "text" }, - "prefix": { "type": "text", "analyzer" : "hierarchy_analyzer" } - } - }, - "title": { - "type": "text", - "search_analyzer": "synonyms_analyzer", - "fields": { - "keyword": { "type": "keyword" } - } - } - } - }, - "hash" : { "type" : "keyword" }, - "enrichment_key" : { "type" : "keyword" }, - "search_title": { - "type": "text", - "analyzer": "synonyms_fixed_analyzer", - "search_analyzer": "synonyms_analyzer", - "fields": { - "completion": { - "type": "search_as_you_type", - "analyzer": "synonyms_fixed_analyzer", - "search_analyzer": "synonyms_analyzer", - "term_vector": "with_positions_offsets", - "index_options": "offsets" - } - } - }, - "title": { - "type": "text", - "search_analyzer": "synonyms_analyzer", - "fields": { - "keyword": { "type": "keyword", "normalizer": "keyword_normalizer" }, - "starts_with": { "type": "text", "analyzer": "starts_with_analyzer", "search_analyzer": "starts_with_analyzer_search" }, - "completion": { "type": "search_as_you_type", "search_analyzer": "synonyms_analyzer" } - {{(!string.IsNullOrWhiteSpace(inferenceId) ? $$""", "semantic_text": {{{InferenceMapping(inferenceId)}}}""" : "")}} - } - }, - "body": { - "type": "text" - }, - "stripped_body": { - "type": "text", - "analyzer": "synonyms_fixed_analyzer", - "search_analyzer": "synonyms_analyzer", - "term_vector": "with_positions_offsets" - }, - "headings": { - "type": "text", - "analyzer": "synonyms_fixed_analyzer", - "search_analyzer": "synonyms_analyzer" - }, - "abstract": { - "type" : "text", - "analyzer": "synonyms_fixed_analyzer", - "search_analyzer": "synonyms_analyzer", - "fields" : { - {{(!string.IsNullOrWhiteSpace(inferenceId) ? $"\"semantic_text\": {{{InferenceMapping(inferenceId)}}}" : "")}} - } - }, - "ai_rag_optimized_summary": { - "type": "text", - "analyzer": "synonyms_fixed_analyzer", - "search_analyzer": "synonyms_analyzer", - "fields": { - {{(!string.IsNullOrWhiteSpace(inferenceId) ? $"\"semantic_text\": {{{InferenceMapping(inferenceId)}}}" : "")}} - } - }, - "ai_short_summary": { - "type": "text" - }, - "ai_search_query": { - "type": "keyword" - }, - "ai_questions": { - "type": "text", - "fields": { - {{(!string.IsNullOrWhiteSpace(inferenceId) ? $"\"semantic_text\": {{{InferenceMapping(inferenceId)}}}" : "")}} - } - }, - "ai_use_cases": { - "type": "text", - "fields": { - {{(!string.IsNullOrWhiteSpace(inferenceId) ? $"\"semantic_text\": {{{InferenceMapping(inferenceId)}}}" : "")}} - } - }, - "enrichment_prompt_hash": { - "type": "keyword" - } - } - } - """; - - private static string InferenceMapping(string inferenceId) => - $""" - "type": "semantic_text", - "inference_id": "{inferenceId}" - """; -} diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.cs deleted file mode 100644 index 6ff857956..000000000 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.cs +++ /dev/null @@ -1,161 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using Elastic.Channels; -using Elastic.Documentation.Configuration; -using Elastic.Documentation.Diagnostics; -using Elastic.Documentation.Search; -using Elastic.Documentation.Serialization; -using Elastic.Ingest.Elasticsearch.Catalog; -using Elastic.Ingest.Elasticsearch.Indices; -using Elastic.Ingest.Elasticsearch.Semantic; -using Elastic.Transport; -using Microsoft.Extensions.Logging; - -namespace Elastic.Markdown.Exporters.Elasticsearch; - -public class ElasticsearchLexicalIngestChannel( - ILoggerFactory logFactory, - IDiagnosticsCollector collector, - ElasticsearchEndpoint endpoint, - string indexNamespace, - DistributedTransport transport, - string[] indexTimeSynonyms, - string? defaultPipeline = null -) - : ElasticsearchIngestChannel, CatalogIndexChannel> - (logFactory, collector, endpoint, transport, o => new(o), t => new(t) - { - BulkOperationIdLookup = d => d.Url, - // hash, last_updated and batch_index_date are all set before the docs are written to the channel - ScriptedHashBulkUpsertLookup = (d, _) => new HashedBulkUpdate("hash", d.Hash, "ctx._source.batch_index_date = params.batch_index_date", - new Dictionary - { - { "batch_index_date", d.BatchIndexDate.ToString("o") } - }), - GetMapping = () => CreateMapping(null), - GetMappingSettings = () => CreateMappingSetting($"docs-{indexNamespace}", indexTimeSynonyms, defaultPipeline), - IndexFormat = - $"{endpoint.IndexNamePrefix.Replace("semantic", "lexical").ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}-{{0:yyyy.MM.dd.HHmmss}}", - ActiveSearchAlias = $"{endpoint.IndexNamePrefix.Replace("semantic", "lexical").ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}" - }); - -public class ElasticsearchSemanticIngestChannel( - ILoggerFactory logFactory, - IDiagnosticsCollector collector, - ElasticsearchEndpoint endpoint, - string indexNamespace, - DistributedTransport transport, - string[] indexTimeSynonyms, - string? defaultPipeline = null -) - : ElasticsearchIngestChannel, SemanticIndexChannel> - (logFactory, collector, endpoint, transport, o => new(o), t => new(t) - { - BulkOperationIdLookup = d => d.Url, - GetMapping = (inferenceId, _) => CreateMapping(inferenceId), - GetMappingSettings = (_, _) => CreateMappingSetting($"docs-{indexNamespace}", indexTimeSynonyms, defaultPipeline), - IndexFormat = $"{endpoint.IndexNamePrefix.ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}-{{0:yyyy.MM.dd.HHmmss}}", - ActiveSearchAlias = $"{endpoint.IndexNamePrefix}-{indexNamespace.ToLowerInvariant()}", - IndexNumThreads = endpoint.IndexNumThreads, - SearchNumThreads = endpoint.SearchNumThreads, - InferenceCreateTimeout = TimeSpan.FromMinutes(endpoint.BootstrapTimeout ?? 4), - UsePreexistingInferenceIds = !endpoint.NoElasticInferenceService, - InferenceId = endpoint.NoElasticInferenceService ? null : ".elser-2-elastic", - SearchInferenceId = endpoint.NoElasticInferenceService ? null : ".elser-2-elastic" - }); - -public abstract partial class ElasticsearchIngestChannel : IDisposable - where TChannelOptions : CatalogIndexChannelOptionsBase - where TChannel : CatalogIndexChannel -{ - private readonly IDiagnosticsCollector _collector; - public TChannel Channel { get; } - private readonly ILogger _logger; - - protected ElasticsearchIngestChannel( - ILoggerFactory logFactory, - IDiagnosticsCollector collector, - ElasticsearchEndpoint endpoint, - DistributedTransport transport, - Func createChannel, - Func createOptions - ) - { - _collector = collector; - _logger = logFactory.CreateLogger>(); - //The max num threads per allocated node, from testing its best to limit our max concurrency - //producing to this number as well - var options = createOptions(transport); - var i = 0; - options.BufferOptions = new BufferOptions - { - OutboundBufferMaxSize = endpoint.BufferSize, - ExportMaxConcurrency = endpoint.IndexNumThreads, - ExportMaxRetries = endpoint.MaxRetries - }; - options.SerializerContext = SourceGenerationContext.Default; - options.ExportBufferCallback = () => - { - var count = Interlocked.Increment(ref i); - _logger.LogInformation("Exported {Count} documents to Elasticsearch index {IndexName}", - count * endpoint.BufferSize, Channel?.IndexName ?? string.Format(options.IndexFormat, "latest")); - }; - options.ExportExceptionCallback = e => - { - _logger.LogError(e, "Failed to export document"); - _collector.EmitGlobalError("Elasticsearch export: failed to export document", e); - }; - options.ServerRejectionCallback = items => - { - foreach (var (doc, responseItem) in items) - { - _collector.EmitGlobalError( - $"Server rejection: {responseItem.Status} {responseItem.Error?.Type} {responseItem.Error?.Reason} for document {doc.Url}"); - } - }; - Channel = createChannel(options); - _logger.LogInformation("Created {Channel} Elasticsearch target for indexing", typeof(TChannel).Name); - } - - public async ValueTask StopAsync(Cancel ctx = default) - { - _logger.LogInformation("Waiting to drain all inflight exports to Elasticsearch"); - var drained = await Channel.WaitForDrainAsync(null, ctx); - if (!drained) - _collector.EmitGlobalError("Elasticsearch export: failed to complete indexing in a timely fashion while shutting down"); - - _logger.LogInformation("Refreshing target index {Index}", Channel.IndexName); - var refreshed = await Channel.RefreshAsync(ctx); - if (!refreshed) - _collector.EmitGlobalError($"Refreshing target index {Channel.IndexName} did not complete successfully"); - - _logger.LogInformation("Applying aliases to {Index}", Channel.IndexName); - var swapped = await Channel.ApplyAliasesAsync(ctx); - if (!swapped) - _collector.EmitGlobalError($"${nameof(ElasticsearchMarkdownExporter)} failed to apply aliases to index {Channel.IndexName}"); - - return drained && refreshed && swapped; - } - - public async ValueTask RefreshAsync(Cancel ctx = default) => await Channel.RefreshAsync(ctx); - - public async ValueTask TryWrite(DocumentationDocument document, Cancel ctx = default) - { - if (Channel.TryWrite(document)) - return true; - - if (await Channel.WaitToWriteAsync(ctx)) - return Channel.TryWrite(document); - return false; - } - - public void Dispose() - { - Channel.Complete(); - Channel.Dispose(); - - GC.SuppressFinalize(this); - } -} diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs index 00f4d65a0..e69dfc9e5 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs @@ -27,8 +27,8 @@ public partial class ElasticsearchMarkdownExporter /// private void AssignDocumentMetadata(DocumentationDocument doc) { - var semanticHash = _semanticChannel.Channel.ChannelHash; - var lexicalHash = _lexicalChannel.Channel.ChannelHash; + var semanticHash = _semanticTypeContext?.Hash ?? string.Empty; + var lexicalHash = _lexicalTypeContext.Hash; var hash = HashedBulkUpdate.CreateHash(semanticHash, lexicalHash, doc.Url, doc.Type, doc.StrippedBody ?? string.Empty, string.Join(",", doc.Headings.OrderBy(h => h)), doc.SearchTitle ?? string.Empty, @@ -37,8 +37,6 @@ private void AssignDocumentMetadata(DocumentationDocument doc) _fixedSynonymsHash ); doc.Hash = hash; - doc.LastUpdated = _batchIndexDate; - doc.BatchIndexDate = _batchIndexDate; } private static void CommonEnrichments(DocumentationDocument doc, INavigationItem? navigationItem) @@ -165,9 +163,7 @@ public async ValueTask ExportAsync(MarkdownExportFileContext fileContext, AssignDocumentMetadata(doc); - if (_indexStrategy == IngestStrategy.Multiplex) - return await _lexicalChannel.TryWrite(doc, ctx) && await _semanticChannel.TryWrite(doc, ctx); - return await _lexicalChannel.TryWrite(doc, ctx); + return await WriteDocumentAsync(doc, ctx); } /// @@ -209,22 +205,10 @@ public async ValueTask FinishExportAsync(IDirectoryInfo outputFolder, Canc AssignDocumentMetadata(doc); - // Write to channels following the multiplex or reindex strategy - if (_indexStrategy == IngestStrategy.Multiplex) + if (!await WriteDocumentAsync(doc, ctx)) { - if (!await _lexicalChannel.TryWrite(doc, ctx) || !await _semanticChannel.TryWrite(doc, ctx)) - { - _logger.LogError("Failed to write OpenAPI document {Url}", doc.Url); - return false; - } - } - else - { - if (!await _lexicalChannel.TryWrite(doc, ctx)) - { - _logger.LogError("Failed to write OpenAPI document {Url}", doc.Url); - return false; - } + _logger.LogError("Failed to write OpenAPI document {Url}", doc.Url); + return false; } } diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs index 5220bfe39..6a6a510dd 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs @@ -4,38 +4,37 @@ using System.Text.Json; using System.Text.Json.Serialization; +using Elastic.Channels; using Elastic.Documentation.Configuration; using Elastic.Documentation.Configuration.Search; using Elastic.Documentation.Configuration.Versions; using Elastic.Documentation.Diagnostics; +using Elastic.Documentation.Search; +using Elastic.Documentation.Serialization; using Elastic.Ingest.Elasticsearch; using Elastic.Ingest.Elasticsearch.Indices; +using Elastic.Mapping; using Elastic.Markdown.Exporters.Elasticsearch.Enrichment; using Elastic.Transport; using Microsoft.Extensions.Logging; -using NetEscapades.EnumGenerators; namespace Elastic.Markdown.Exporters.Elasticsearch; -[EnumExtensions] -public enum IngestStrategy { Reindex, Multiplex } - public partial class ElasticsearchMarkdownExporter : IMarkdownExporter, IDisposable { private readonly IDiagnosticsCollector _collector; private readonly IDocumentationConfigurationContext _context; private readonly ILogger _logger; - private readonly ElasticsearchLexicalIngestChannel _lexicalChannel; - private readonly ElasticsearchSemanticIngestChannel _semanticChannel; - private readonly ElasticsearchEndpoint _endpoint; - - private readonly DateTimeOffset _batchIndexDate = DateTimeOffset.UtcNow; private readonly DistributedTransport _transport; - private IngestStrategy _indexStrategy; - private readonly string _indexNamespace; - private string _currentLexicalHash = string.Empty; - private string _currentSemanticHash = string.Empty; + private readonly string _buildType; + + // Ingest: orchestrator for dual-index mode + private readonly IncrementalSyncOrchestrator _orchestrator; + + // Type context hashes for document content hash computation + private readonly ElasticsearchTypeContext _lexicalTypeContext; + private readonly ElasticsearchTypeContext _semanticTypeContext; private readonly IReadOnlyDictionary _synonyms; private readonly IReadOnlyCollection _rules; @@ -57,7 +56,7 @@ public ElasticsearchMarkdownExporter( ILoggerFactory logFactory, IDiagnosticsCollector collector, DocumentationEndpoints endpoints, - string indexNamespace, + string buildType, IDocumentationConfigurationContext context ) { @@ -65,14 +64,14 @@ IDocumentationConfigurationContext context _context = context; _logger = logFactory.CreateLogger(); _endpoint = endpoints.Elasticsearch; - _indexStrategy = IngestStrategy.Reindex; - _indexNamespace = indexNamespace; + _buildType = buildType; _versionsConfiguration = context.VersionsConfiguration; _synonyms = context.SearchConfiguration.Synonyms; _rules = context.SearchConfiguration.Rules; var es = endpoints.Elasticsearch; _transport = ElasticsearchTransportFactory.Create(es); + _operations = new ElasticsearchOperations(_transport, _logger, collector); string[] fixedSynonyms = ["esql", "data-stream", "data-streams", "machine-learning"]; var indexTimeSynonyms = _synonyms.Aggregate(new List(), (acc, synonym) => @@ -83,15 +82,18 @@ IDocumentationConfigurationContext context }).Where(r => fixedSynonyms.Contains(r.Id)).Select(r => r.Synonyms).ToArray(); _fixedSynonymsHash = HashedBulkUpdate.CreateHash(string.Join(",", indexTimeSynonyms)); - // Use AI enrichment pipeline if enabled - hybrid approach: - // - Cache hits: enrich processor applies fields at index time - // - Cache misses: apply fields inline before indexing var aiPipeline = es.EnableAiEnrichment ? EnrichPolicyManager.PipelineName : null; - _lexicalChannel = new ElasticsearchLexicalIngestChannel(logFactory, collector, es, indexNamespace, _transport, indexTimeSynonyms, aiPipeline); - _semanticChannel = new ElasticsearchSemanticIngestChannel(logFactory, collector, es, indexNamespace, _transport, indexTimeSynonyms, aiPipeline); + var synonymSetName = $"docs-{buildType}"; - // Initialize shared ES operations - _operations = new ElasticsearchOperations(_transport, _logger, collector); + var pipelineSettings = aiPipeline is not null + ? new Dictionary { ["index.default_pipeline"] = aiPipeline } + : null; + + _lexicalTypeContext = DocumentationMappingContext.DocumentationDocument.CreateContext(type: buildType) with + { + ConfigureAnalysis = a => DocumentationAnalysisFactory.BuildAnalysis(a, synonymSetName, indexTimeSynonyms), + IndexSettings = pipelineSettings + }; // Initialize AI enrichment services if enabled if (es.EnableAiEnrichment) @@ -100,81 +102,136 @@ IDocumentationConfigurationContext context _llmClient = new ElasticsearchLlmClient(_transport, logFactory.CreateLogger(), _operations); _enrichPolicyManager = new EnrichPolicyManager(_transport, logFactory.CreateLogger(), _enrichmentCache.IndexName); } + + _semanticTypeContext = DocumentationMappingContext.DocumentationDocumentSemantic.CreateContext(type: buildType) with + { + ConfigureAnalysis = a => DocumentationAnalysisFactory.BuildAnalysis(a, synonymSetName, indexTimeSynonyms), + IndexSettings = pipelineSettings + }; + + var resolver = DocumentationMappingContext.DocumentationDocument; + _orchestrator = new IncrementalSyncOrchestrator( + _transport, _lexicalTypeContext, _semanticTypeContext, + setBatchIndexDate: resolver.SetBatchIndexDate, + setLastUpdated: resolver.SetLastUpdated) + { + ConfigurePrimary = ConfigureChannelOptions, + ConfigureSecondary = ConfigureChannelOptions, + OnPostComplete = es.EnableAiEnrichment + ? async (ctx, _, ct) => await PostCompleteAsync(ctx, ct) + : null + }; + _ = _orchestrator.AddPreBootstrapTask(async (_, ct) => + { + await InitializeEnrichmentAsync(ct); + await PublishSynonymsAsync(ct); + await PublishQueryRulesAsync(ct); + }); + } + + private void ConfigureChannelOptions(IngestChannelOptions options) + { + options.BufferOptions = new BufferOptions + { + OutboundBufferMaxSize = _endpoint.BufferSize, + ExportMaxConcurrency = _endpoint.IndexNumThreads, + ExportMaxRetries = _endpoint.MaxRetries + }; + options.SerializerContext = SourceGenerationContext.Default; + options.ExportExceptionCallback = e => + { + _logger.LogError(e, "Failed to export document"); + _collector.EmitGlobalError("Elasticsearch export: failed to export document", e); + }; + options.ServerRejectionCallback = items => + { + foreach (var (doc, responseItem) in items) + { + _collector.EmitGlobalError( + $"Server rejection: {responseItem.Status} {responseItem.Error?.Type} {responseItem.Error?.Reason} for document {doc.Url}"); + } + }; } /// public async ValueTask StartAsync(Cancel ctx = default) { - // Initialize AI enrichment cache (pre-loads existing hashes into memory) - if (_enrichmentCache is not null && _enrichPolicyManager is not null) - { - _logger.LogInformation("Initializing AI enrichment cache..."); - await _enrichmentCache.InitializeAsync(ctx); - _logger.LogInformation("AI enrichment cache ready with {Count} existing entries", _enrichmentCache.Count); - - // The enrich pipeline must exist before indexing (used as default_pipeline). - // The pipeline's enrich processor requires the .enrich-* index to exist, - // which is created by executing the policy. We execute even with an empty - // cache index - it just creates an empty enrich index that returns no matches. - _logger.LogInformation("Setting up enrich policy and pipeline..."); - await _enrichPolicyManager.ExecutePolicyAsync(ctx); - await _enrichPolicyManager.EnsurePipelineExistsAsync(ctx); - } + _ = await _orchestrator.StartAsync(BootstrapMethod.Failure, ctx); + _logger.LogInformation("Orchestrator started with {Strategy} strategy", _orchestrator.Strategy); + } - _currentLexicalHash = await _lexicalChannel.Channel.GetIndexTemplateHashAsync(ctx) ?? string.Empty; - _currentSemanticHash = await _semanticChannel.Channel.GetIndexTemplateHashAsync(ctx) ?? string.Empty; + /// + public async ValueTask StopAsync(Cancel ctx = default) => + _ = await _orchestrator.CompleteAsync(null, ctx); - await PublishSynonymsAsync(ctx); - await PublishQueryRulesAsync(ctx); - _ = await _lexicalChannel.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Failure, null, ctx); + private async Task InitializeEnrichmentAsync(Cancel ctx) + { + if (_enrichmentCache is null || _enrichPolicyManager is null) + return; - // if the previous hash does not match the current hash, we know already we want to multiplex to a new index - if (_currentLexicalHash != _lexicalChannel.Channel.ChannelHash) - _indexStrategy = IngestStrategy.Multiplex; + _logger.LogInformation("Initializing AI enrichment cache..."); + await _enrichmentCache.InitializeAsync(ctx); + _logger.LogInformation("AI enrichment cache ready with {Count} existing entries", _enrichmentCache.Count); - if (!_endpoint.NoSemantic) - { - var semanticWriteAlias = string.Format(_semanticChannel.Channel.Options.IndexFormat, "latest"); - var semanticIndexAvailable = await _transport.HeadAsync(semanticWriteAlias, ctx); - if (!semanticIndexAvailable.ApiCallDetails.HasSuccessfulStatusCode && _endpoint is { ForceReindex: false, NoSemantic: false }) - { - _indexStrategy = IngestStrategy.Multiplex; - _logger.LogInformation("Index strategy set to multiplex because {SemanticIndex} does not exist, pass --force-reindex to always use reindex", semanticWriteAlias); - } + _logger.LogInformation("Setting up enrich policy and pipeline..."); + await _enrichPolicyManager.ExecutePolicyAsync(ctx); + await _enrichPolicyManager.EnsurePipelineExistsAsync(ctx); + } - //try re-use index if we are re-indexing. Multiplex should always go to a new index - _semanticChannel.Channel.Options.TryReuseIndex = _indexStrategy == IngestStrategy.Reindex; - _ = await _semanticChannel.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Failure, null, ctx); - } + private async Task PostCompleteAsync(OrchestratorContext context, Cancel ctx) => + await ExecuteEnrichPolicyIfNeededAsync(context.SecondaryWriteAlias, ctx); - var lexicalIndexExists = await IndexExists(_lexicalChannel.Channel.IndexName) ? "existing" : "new"; - var semanticIndexExists = await IndexExists(_semanticChannel.Channel.IndexName) ? "existing" : "new"; - if (_currentLexicalHash != _lexicalChannel.Channel.ChannelHash) - { - _indexStrategy = IngestStrategy.Multiplex; - _logger.LogInformation("Multiplexing lexical new index: '{Index}' since current hash on server '{HashCurrent}' does not match new '{HashNew}'", - _lexicalChannel.Channel.IndexName, _currentLexicalHash, _lexicalChannel.Channel.ChannelHash); - } - else - _logger.LogInformation("Targeting {State} lexical: '{Index}'", lexicalIndexExists, _lexicalChannel.Channel.IndexName); + private async ValueTask ExecuteEnrichPolicyIfNeededAsync(string? semanticAlias, Cancel ctx) + { + if (_enrichmentCache is null || _enrichPolicyManager is null) + return; - if (!_endpoint.NoSemantic && _currentSemanticHash != _semanticChannel.Channel.ChannelHash) + _logger.LogInformation( + "AI enrichment complete: {CacheHits} cache hits, {Enrichments} enrichments generated (limit: {Limit})", + _cacheHitCount, _enrichmentCount, _enrichmentOptions.MaxNewEnrichmentsPerRun); + + if (_enrichmentCache.Count > 0) { - _indexStrategy = IngestStrategy.Multiplex; - _logger.LogInformation("Multiplexing new index '{Index}' since current hash on server '{HashCurrent}' does not match new '{HashNew}'", - _semanticChannel.Channel.IndexName, _currentSemanticHash, _semanticChannel.Channel.ChannelHash); + _logger.LogInformation("Executing enrich policy to update internal index with {Count} total entries...", _enrichmentCache.Count); + await _enrichPolicyManager.ExecutePolicyAsync(ctx); + + if (semanticAlias is not null) + await BackfillMissingAiFieldsAsync(semanticAlias, ctx); } - else if (!_endpoint.NoSemantic) - _logger.LogInformation("Targeting {State} semantical: '{Index}'", semanticIndexExists, _semanticChannel.Channel.IndexName); + } - _logger.LogInformation("Using {IndexStrategy} to sync lexical index to semantic index", _indexStrategy.ToStringFast(true)); + private async ValueTask BackfillMissingAiFieldsAsync(string semanticAlias, Cancel ctx) + { + if (_enrichmentCache is null || _llmClient is null) + return; + + var currentPromptHash = ElasticsearchLlmClient.PromptHash; + + _logger.LogInformation( + "Starting AI backfill for documents missing or stale AI fields (cache has {CacheCount} entries, prompt hash: {PromptHash})", + _enrichmentCache.Count, currentPromptHash[..8]); - async ValueTask IndexExists(string name) => (await _transport.HeadAsync(name, ctx)).ApiCallDetails.HasSuccessfulStatusCode; + var query = $$""" + { + "query": { + "bool": { + "must": { "exists": { "field": "enrichment_key" } }, + "should": [ + { "bool": { "must_not": { "exists": { "field": "ai_questions" } } } }, + { "bool": { "must_not": { "term": { "enrichment_prompt_hash": "{{currentPromptHash}}" } } } } + ], + "minimum_should_match": 1 + } + } + } + """; + + await _operations.UpdateByQueryAsync(semanticAlias, PostData.String(query), EnrichPolicyManager.PipelineName, ctx); } private async Task PublishSynonymsAsync(Cancel ctx) { - var setName = $"docs-{_indexNamespace}"; + var setName = $"docs-{_buildType}"; _logger.LogInformation("Publishing synonym set '{SetName}' to Elasticsearch", setName); var synonymRules = _synonyms.Aggregate(new List(), (acc, synonym) => @@ -198,7 +255,8 @@ private async Task PutSynonyms(SynonymsSet synonymsSet, string setName, Cancel c ctx); if (!response.ApiCallDetails.HasSuccessfulStatusCode) - _collector.EmitGlobalError($"Failed to publish synonym set '{setName}'. Reason: {response.ApiCallDetails.OriginalException?.Message ?? response.ToString()}"); + _collector.EmitGlobalError( + $"Failed to publish synonym set '{setName}'. Reason: {response.ApiCallDetails.OriginalException?.Message ?? response.ToString()}"); else _logger.LogInformation("Successfully published synonym set '{SetName}'.", setName); } @@ -211,7 +269,7 @@ private async Task PublishQueryRulesAsync(Cancel ctx) return; } - var rulesetName = $"docs-ruleset-{_indexNamespace}"; + var rulesetName = $"docs-ruleset-{_buildType}"; _logger.LogInformation("Publishing query ruleset '{RulesetName}' with {Count} rules to Elasticsearch", rulesetName, _rules.Count); var rulesetRules = _rules.Select(r => new QueryRulesetRule @@ -241,241 +299,24 @@ private async Task PutQueryRuleset(QueryRuleset ruleset, string rulesetName, Can ctx); if (!response.ApiCallDetails.HasSuccessfulStatusCode) - _collector.EmitGlobalError($"Failed to publish query ruleset '{rulesetName}'. Reason: {response.ApiCallDetails.OriginalException?.Message ?? response.ToString()}"); + _collector.EmitGlobalError( + $"Failed to publish query ruleset '{rulesetName}'. Reason: {response.ApiCallDetails.OriginalException?.Message ?? response.ToString()}"); else _logger.LogInformation("Successfully published query ruleset '{RulesetName}'.", rulesetName); } - private async ValueTask CountAsync(string index, string body, Cancel ctx = default) + internal async ValueTask WriteDocumentAsync(DocumentationDocument doc, Cancel ctx) { - var countResponse = await _operations.WithRetryAsync( - () => _transport.PostAsync($"/{index}/_count", PostData.String(body), ctx), - $"POST {index}/_count", - ctx); - return countResponse.Body.Get("count"); + if (_orchestrator.TryWrite(doc)) + return true; + _ = await _orchestrator.WaitToWriteAsync(doc, ctx); + return true; } - /// - public async ValueTask StopAsync(Cancel ctx = default) - { - var semanticWriteAlias = string.Format(_semanticChannel.Channel.Options.IndexFormat, "latest"); - var lexicalWriteAlias = string.Format(_lexicalChannel.Channel.Options.IndexFormat, "latest"); - - var stopped = await _lexicalChannel.StopAsync(ctx); - if (!stopped) - throw new Exception($"Failed to stop {_lexicalChannel.GetType().Name}"); - - await QueryIngestStatistics(lexicalWriteAlias, ctx); - - if (_indexStrategy == IngestStrategy.Multiplex) - { - if (!_endpoint.NoSemantic) - _ = await _semanticChannel.StopAsync(ctx); - - // cleanup lexical index of old data - await DoDeleteByQuery(lexicalWriteAlias, ctx); - // need to refresh the lexical index to ensure that the delete by query is available - _ = await _lexicalChannel.RefreshAsync(ctx); - await QueryDocumentCounts(ctx); - // ReSharper disable once ConvertIfStatementToConditionalTernaryExpression - if (_endpoint.NoSemantic) - _logger.LogInformation("Finish indexing {IndexStrategy} strategy", _indexStrategy.ToStringFast(true)); - else - _logger.LogInformation("Finish syncing to semantic in {IndexStrategy} strategy", _indexStrategy.ToStringFast(true)); - return; - } - - if (_endpoint.NoSemantic) - { - _logger.LogInformation("--no-semantic was specified so exiting early before reindexing to {Index}", lexicalWriteAlias); - return; - } - - var semanticIndex = _semanticChannel.Channel.IndexName; - // check if the alias exists - var semanticIndexHead = await _transport.HeadAsync(semanticWriteAlias, ctx); - if (!semanticIndexHead.ApiCallDetails.HasSuccessfulStatusCode) - { - _logger.LogInformation("No semantic index exists yet, creating index {Index} for semantic search", semanticIndex); - _ = await _semanticChannel.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Failure, null, ctx); - var semanticIndexPut = await _transport.PutAsync(semanticIndex, PostData.String("{}"), ctx); - if (!semanticIndexPut.ApiCallDetails.HasSuccessfulStatusCode) - throw new Exception($"Failed to create index {semanticIndex}: {semanticIndexPut}"); - } - var destinationIndex = _semanticChannel.Channel.IndexName; - - _logger.LogInformation("_reindex updates: '{SourceIndex}' => '{DestinationIndex}'", lexicalWriteAlias, destinationIndex); - var request = PostData.String(@" - { - ""dest"": { - ""index"": """ + destinationIndex + @""" - }, - ""source"": { - ""index"": """ + lexicalWriteAlias + @""", - ""size"": 100, - ""query"": { - ""range"": { - ""last_updated"": { - ""gte"": """ + _batchIndexDate.ToString("o") + @""" - } - } - } - } - }"); - await DoReindex(request, lexicalWriteAlias, destinationIndex, "updates", ctx); - - _logger.LogInformation("_reindex deletions: '{SourceIndex}' => '{DestinationIndex}'", lexicalWriteAlias, destinationIndex); - request = PostData.String(@" - { - ""dest"": { - ""index"": """ + destinationIndex + @""" - }, - ""script"": { - ""source"": ""ctx.op = \""delete\"""" - }, - ""source"": { - ""index"": """ + lexicalWriteAlias + @""", - ""size"": 100, - ""query"": { - ""range"": { - ""batch_index_date"": { - ""lt"": """ + _batchIndexDate.ToString("o") + @""" - } - } - } - } - }"); - await DoReindex(request, lexicalWriteAlias, destinationIndex, "deletions", ctx); - - await DoDeleteByQuery(lexicalWriteAlias, ctx); - - _ = await _lexicalChannel.Channel.ApplyLatestAliasAsync(ctx); - _ = await _semanticChannel.Channel.ApplyAliasesAsync(ctx); - - _ = await _lexicalChannel.RefreshAsync(ctx); - _ = await _semanticChannel.RefreshAsync(ctx); - - _logger.LogInformation("Finish sync to semantic index using {IndexStrategy} strategy", _indexStrategy.ToStringFast(true)); - await QueryDocumentCounts(ctx); - - // Execute enrich policy so new cache entries are available for next run - await ExecuteEnrichPolicyIfNeededAsync(ctx); - } - - private async ValueTask ExecuteEnrichPolicyIfNeededAsync(Cancel ctx) - { - if (_enrichmentCache is null || _enrichPolicyManager is null) - return; - - _logger.LogInformation( - "AI enrichment complete: {CacheHits} cache hits, {Enrichments} enrichments generated (limit: {Limit})", - _cacheHitCount, _enrichmentCount, _enrichmentOptions.MaxNewEnrichmentsPerRun); - - if (_enrichmentCache.Count > 0) - { - _logger.LogInformation("Executing enrich policy to update internal index with {Count} total entries...", _enrichmentCache.Count); - await _enrichPolicyManager.ExecutePolicyAsync(ctx); - - // Backfill: Apply AI fields to documents that were skipped by hash-based upsert - await BackfillMissingAiFieldsAsync(ctx); - } - } - - private async ValueTask BackfillMissingAiFieldsAsync(Cancel ctx) - { - // Why backfill is needed: - // The exporter uses hash-based upsert - unchanged documents are skipped during indexing. - // These skipped documents never pass through the ingest pipeline, so they miss AI fields. - // This backfill runs _update_by_query with the AI pipeline to enrich those documents. - // - // Additionally, when prompts change, existing documents have stale AI fields. - // We detect this by checking if the document's prompt_hash differs from the current one. - // - // Only backfill the semantic index - it's what the search API uses. - // The lexical index is just an intermediate step for reindexing. - if (_endpoint.NoSemantic || _enrichmentCache is null || _llmClient is null) - return; - - var semanticAlias = _semanticChannel.Channel.Options.ActiveSearchAlias; - var currentPromptHash = ElasticsearchLlmClient.PromptHash; - - _logger.LogInformation( - "Starting AI backfill for documents missing or stale AI fields (cache has {CacheCount} entries, prompt hash: {PromptHash})", - _enrichmentCache.Count, currentPromptHash[..8]); - - // Find documents with enrichment_key that either: - // 1. Missing AI fields (never enriched), OR - // 2. Have stale/missing enrichment_prompt_hash (enriched with old prompts) - var query = $$""" - { - "query": { - "bool": { - "must": { "exists": { "field": "enrichment_key" } }, - "should": [ - { "bool": { "must_not": { "exists": { "field": "ai_questions" } } } }, - { "bool": { "must_not": { "term": { "enrichment_prompt_hash": "{{currentPromptHash}}" } } } } - ], - "minimum_should_match": 1 - } - } - } - """; - - await RunBackfillQuery(semanticAlias, query, ctx); - } - - private async ValueTask RunBackfillQuery(string indexAlias, string query, Cancel ctx) => - await _operations.UpdateByQueryAsync(indexAlias, PostData.String(query), EnrichPolicyManager.PipelineName, ctx); - - private async ValueTask QueryIngestStatistics(string lexicalWriteAlias, Cancel ctx) - { - var lexicalSearchAlias = _lexicalChannel.Channel.Options.ActiveSearchAlias; - var updated = await CountAsync(lexicalSearchAlias, $$""" { "query": { "range": { "last_updated": { "gte": "{{_batchIndexDate:o}}" } } } }""", ctx); - var total = await CountAsync(lexicalSearchAlias, $$""" { "query": { "range": { "batch_index_date": { "gte": "{{_batchIndexDate:o}}" } } } }""", ctx); - var deleted = await CountAsync(lexicalSearchAlias, $$""" { "query": { "range": { "batch_index_date": { "lt": "{{_batchIndexDate:o}}" } } } }""", ctx); - - // TODO emit these as metrics - _logger.LogInformation("Exported {Total}, Updated {Updated}, Deleted, {Deleted} documents to {LexicalIndex}", total, updated, deleted, lexicalWriteAlias); - _logger.LogInformation("Syncing to semantic index using {IndexStrategy} strategy", _indexStrategy.ToStringFast(true)); - } - - private async ValueTask QueryDocumentCounts(Cancel ctx) - { - var semanticWriteAlias = string.Format(_semanticChannel.Channel.Options.IndexFormat, "latest"); - var lexicalWriteAlias = string.Format(_lexicalChannel.Channel.Options.IndexFormat, "latest"); - var totalLexical = await CountAsync(lexicalWriteAlias, "{}", ctx); - var totalSemantic = await CountAsync(semanticWriteAlias, "{}", ctx); - - // TODO emit these as metrics - _logger.LogInformation("Document counts -> Semantic Index: {TotalSemantic}, Lexical Index: {TotalLexical}", totalSemantic, totalLexical); - } - - private async ValueTask DoDeleteByQuery(string lexicalWriteAlias, Cancel ctx) - { - // delete all documents with batch_index_date < _batchIndexDate - // they weren't part of the current export - _logger.LogInformation("Delete data in '{SourceIndex}' not part of batch date: {Date}", lexicalWriteAlias, _batchIndexDate.ToString("o")); - var query = PostData.String(@" - { - ""query"": { - ""range"": { - ""batch_index_date"": { - ""lt"": """ + _batchIndexDate.ToString("o") + @""" - } - } - } - }"); - await _operations.DeleteByQueryAsync(lexicalWriteAlias, query, ctx); - } - - private async ValueTask DoReindex(PostData request, string lexicalWriteAlias, string semanticWriteAlias, string typeOfSync, Cancel ctx) => - await _operations.ReindexAsync(request, lexicalWriteAlias, semanticWriteAlias, typeOfSync, ctx); - /// public void Dispose() { - _lexicalChannel.Dispose(); - _semanticChannel.Dispose(); + _orchestrator.Dispose(); _llmClient?.Dispose(); GC.SuppressFinalize(this); } diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchOperations.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchOperations.cs index 4f94ae14a..3a3952406 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchOperations.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchOperations.cs @@ -161,22 +161,6 @@ public async Task DeleteByQueryAsync( await PollTaskUntilCompleteAsync(taskId, "_delete_by_query", index, null, ct); } - /// - /// Executes a reindex operation and waits for completion. - /// - public async Task ReindexAsync( - PostData request, - string sourceIndex, - string destIndex, - string operationType, - CancellationToken ct) - { - var url = "/_reindex?wait_for_completion=false&scroll=10m"; - var taskId = await PostAsyncTaskAsync(url, request, $"POST _reindex ({operationType})", ct); - if (taskId is not null) - await PollTaskUntilCompleteAsync(taskId, $"_reindex {operationType}", sourceIndex, destIndex, ct); - } - /// /// Executes an update_by_query operation and waits for completion. /// diff --git a/src/Elastic.Markdown/Exporters/ExporterExtensions.cs b/src/Elastic.Markdown/Exporters/ExporterExtensions.cs index cec7388f3..6deb2a8c0 100644 --- a/src/Elastic.Markdown/Exporters/ExporterExtensions.cs +++ b/src/Elastic.Markdown/Exporters/ExporterExtensions.cs @@ -15,7 +15,7 @@ public static IReadOnlyCollection CreateMarkdownExporters( this IReadOnlySet exportOptions, ILoggerFactory logFactory, IDocumentationConfigurationContext context, - string indexNamespace + string buildType ) { var markdownExporters = new List(4); @@ -24,7 +24,7 @@ string indexNamespace if (exportOptions.Contains(Exporter.Configuration)) markdownExporters.Add(new ConfigurationExporter(logFactory, context.ConfigurationFileProvider, context)); if (exportOptions.Contains(Exporter.Elasticsearch)) - markdownExporters.Add(new ElasticsearchMarkdownExporter(logFactory, context.Collector, context.Endpoints, indexNamespace, context)); + markdownExporters.Add(new ElasticsearchMarkdownExporter(logFactory, context.Collector, context.Endpoints, buildType, context)); return markdownExporters; } } diff --git a/src/api/Elastic.Documentation.Api.App/Program.cs b/src/api/Elastic.Documentation.Api.App/Program.cs index 2165dead6..ff255c280 100644 --- a/src/api/Elastic.Documentation.Api.App/Program.cs +++ b/src/api/Elastic.Documentation.Api.App/Program.cs @@ -4,6 +4,7 @@ using Elastic.Documentation.Api.Infrastructure; using Elastic.Documentation.Api.Infrastructure.OpenTelemetry; +using Elastic.Documentation.Configuration; using Elastic.Documentation.Configuration.Assembler; using Elastic.Documentation.Search; using Elastic.Documentation.ServiceDefaults; @@ -82,17 +83,22 @@ static void LogElasticsearchConfiguration(WebApplication app, ILogger logger) { try { - var esOptions = app.Services.GetService(); - if (esOptions != null) + var endpoints = app.Services.GetService(); + if (endpoints is not null) { + var endpoint = endpoints.Elasticsearch; + var searchIndex = DocumentationMappingContext.DocumentationDocumentSemantic + .CreateContext(type: "assembler") + .ResolveReadTarget(); logger.LogInformation( - "Elasticsearch configuration - Url: {Url}, Index: {Index}", - esOptions.Url, - esOptions.IndexName + "Elasticsearch configuration - Url: {Url}, Namespace: {Namespace}, SearchIndex: {SearchIndex}", + endpoint.Uri, + endpoints.Namespace, + searchIndex ); } else - logger.LogWarning("ElasticsearchOptions could not be resolved from DI"); + logger.LogWarning("DocumentationEndpoints could not be resolved from DI"); } catch (Exception ex) { diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/ElasticsearchAskAiMessageFeedbackGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/ElasticsearchAskAiMessageFeedbackGateway.cs index 345d07ba5..99e20e5ec 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/ElasticsearchAskAiMessageFeedbackGateway.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/ElasticsearchAskAiMessageFeedbackGateway.cs @@ -7,7 +7,7 @@ using Elastic.Clients.Elasticsearch.Serialization; using Elastic.Documentation.Api.Core; using Elastic.Documentation.Api.Core.AskAi; -using Elastic.Documentation.Search; +using Elastic.Documentation.Configuration; using Elastic.Transport; using Microsoft.Extensions.Logging; @@ -25,20 +25,27 @@ public sealed class ElasticsearchAskAiMessageFeedbackGateway : IAskAiMessageFeed private bool _disposed; public ElasticsearchAskAiMessageFeedbackGateway( - ElasticsearchOptions elasticsearchOptions, + DocumentationEndpoints endpoints, AppEnvironment appEnvironment, ILogger logger) { _logger = logger; _indexName = $"ask-ai-message-feedback-{appEnvironment.Current.ToStringFast(true)}"; - _nodePool = new SingleNodePool(new Uri(elasticsearchOptions.Url.Trim())); + var endpoint = endpoints.Elasticsearch; + _nodePool = new SingleNodePool(endpoint.Uri); + var auth = endpoint.ApiKey is { } apiKey + ? (AuthorizationHeader)new ApiKey(apiKey) + : endpoint is { Username: { } username, Password: { } password } + ? new BasicAuthentication(username, password) + : null!; + using var clientSettings = new ElasticsearchClientSettings( _nodePool, sourceSerializer: (_, settings) => new DefaultSourceSerializer(settings, MessageFeedbackJsonContext.Default) ) .DefaultIndex(_indexName) - .Authentication(new ApiKey(elasticsearchOptions.ApiKey)); + .Authentication(auth); _client = new ElasticsearchClient(clientSettings); } diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs index 04db74d4d..8ec6d6d18 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs @@ -58,10 +58,6 @@ public async Task GetParam(string name, bool withDecryption = true, Canc { return GetEnv("DOCUMENTATION_KIBANA_APIKEY"); } - case "docs-elasticsearch-index": - { - return GetEnv("DOCUMENTATION_ELASTIC_INDEX", "semantic-docs-dev-latest"); - } default: { throw new ArgumentException($"Parameter '{name}' not found in {nameof(LocalParameterProvider)}"); diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs index b5549d2c0..639367432 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs @@ -25,7 +25,7 @@ public class DocumentGateway( { var normalizedUrl = NormalizeUrl(url); var response = await clientAccessor.Client.SearchAsync(s => s - .Indices(clientAccessor.Options.IndexName) + .Indices(clientAccessor.SearchIndex) .Query(q => q.Term(t => t.Field(f => f.Url).Value(normalizedUrl))) .Size(1) .Source(sf => sf.Filter(f => f.Includes( @@ -104,7 +104,7 @@ public class DocumentGateway( { var normalizedUrl = NormalizeUrl(url); var response = await clientAccessor.Client.SearchAsync(s => s - .Indices(clientAccessor.Options.IndexName) + .Indices(clientAccessor.SearchIndex) .Query(q => q.Term(t => t.Field(f => f.Url).Value(normalizedUrl))) .Size(1) .Source(sf => sf.Filter(f => f.Includes( diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Program.cs b/src/api/Elastic.Documentation.Mcp.Remote/Program.cs index 6b56eeda3..41615c7c1 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Program.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Program.cs @@ -5,6 +5,7 @@ using Elastic.Documentation.Api.Infrastructure.OpenTelemetry; using Elastic.Documentation.Assembler.Links; using Elastic.Documentation.Assembler.Mcp; +using Elastic.Documentation.Configuration; using Elastic.Documentation.LinkIndex; using Elastic.Documentation.Links.InboundLinks; using Elastic.Documentation.Mcp.Remote; @@ -141,17 +142,22 @@ static void LogElasticsearchConfiguration(WebApplication app, ILogger logger) { try { - var esOptions = app.Services.GetService(); - if (esOptions != null) + var endpoints = app.Services.GetService(); + if (endpoints is not null) { + var endpoint = endpoints.Elasticsearch; + var searchIndex = DocumentationMappingContext.DocumentationDocumentSemantic + .CreateContext(type: "assembler") + .ResolveReadTarget(); logger.LogInformation( - "Elasticsearch configuration - Url: {Url}, Index: {Index}", - esOptions.Url, - esOptions.IndexName + "Elasticsearch configuration - Url: {Url}, Namespace: {Namespace}, SearchIndex: {SearchIndex}", + endpoint.Uri, + endpoints.Namespace, + searchIndex ); } else - logger.LogWarning("ElasticsearchOptions could not be resolved from DI"); + logger.LogWarning("DocumentationEndpoints could not be resolved from DI"); } catch (Exception ex) { diff --git a/src/api/Elastic.Documentation.Mcp.Remote/appsettings.development.json b/src/api/Elastic.Documentation.Mcp.Remote/appsettings.development.json index 15cac94ee..34f00ef13 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/appsettings.development.json +++ b/src/api/Elastic.Documentation.Mcp.Remote/appsettings.development.json @@ -4,6 +4,5 @@ "Default": "Debug", "Microsoft.AspNetCore": "Information" } - }, - "DOCUMENTATION_ELASTIC_INDEX": "semantic-docs-dev-latest" + } } diff --git a/src/api/Elastic.Documentation.Mcp.Remote/appsettings.edge.json b/src/api/Elastic.Documentation.Mcp.Remote/appsettings.edge.json index fe7d17f7b..0c208ae91 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/appsettings.edge.json +++ b/src/api/Elastic.Documentation.Mcp.Remote/appsettings.edge.json @@ -4,6 +4,5 @@ "Default": "Information", "Microsoft.AspNetCore": "Warning" } - }, - "DOCUMENTATION_ELASTIC_INDEX": "semantic-docs-edge-latest" + } } diff --git a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs index bd0a6e760..941aa47c8 100644 --- a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs +++ b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs @@ -109,7 +109,7 @@ Cancel ctx var builder = new AssemblerBuilder(logFactory, assembleContext, navigation, htmlWriter, pathProvider, historyMapper); - await builder.BuildAllAsync(assembleContext.Environment, assembleSources.AssembleSets, exporters, ctx); + await builder.BuildAllAsync(assembleSources.AssembleSets, exporters, ctx); if (exporters.Contains(Exporter.LinkMetadata)) await cloner.WriteLinkRegistrySnapshot(checkoutResult.LinkRegistrySnapshot, ctx); diff --git a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuilder.cs b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuilder.cs index e60954294..74b037059 100644 --- a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuilder.cs +++ b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuilder.cs @@ -38,7 +38,7 @@ public class AssemblerBuilder( private ILegacyUrlMapper? LegacyUrlMapper { get; } = legacyUrlMapper; - public async Task BuildAllAsync(PublishEnvironment environment, FrozenDictionary assembleSets, IReadOnlySet exportOptions, Cancel ctx) + public async Task BuildAllAsync(FrozenDictionary assembleSets, IReadOnlySet exportOptions, Cancel ctx) { if (context.OutputDirectory.Exists) context.OutputDirectory.Delete(true); @@ -48,7 +48,7 @@ public async Task BuildAllAsync(PublishEnvironment environment, FrozenDictionary var buildTimes = new List<(string Name, int FileCount, TimeSpan Duration)>(); // Create exporters without inferrer - inferrer is created per-repository - var markdownExporters = exportOptions.CreateMarkdownExporters(logFactory, context, environment.Name); + var markdownExporters = exportOptions.CreateMarkdownExporters(logFactory, context, "assembler"); var tasks = markdownExporters.Select(async e => await e.StartAsync(ctx)); await Task.WhenAll(tasks); diff --git a/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs b/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs index 1e44b5c93..323129a40 100644 --- a/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs +++ b/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs @@ -32,13 +32,11 @@ ICoreService githubActionsService /// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY /// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME /// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD - /// Index without semantic fields /// Enable AI enrichment of documents using LLM-generated metadata /// The number of search threads the inference endpoint should use. Defaults: 8 /// The number of index threads the inference endpoint should use. Defaults: 8 /// Do not use the Elastic Inference Service, bootstrap inference endpoint /// Timeout in minutes for the inference endpoint creation. Defaults: 4 - /// The prefix for the computed index/alias names. Defaults: semantic-docs /// Force reindex strategy to semantic index /// The number of documents to send to ES as part of the bulk. Defaults: 100 /// The number of times failed bulk items should be retried. Defaults: 3 @@ -60,14 +58,12 @@ public async Task Index(IDiagnosticsCollector collector, string? username = null, string? password = null, // inference options - bool? noSemantic = null, bool? enableAiEnrichment = null, int? searchNumThreads = null, int? indexNumThreads = null, bool? noEis = null, int? bootstrapTimeout = null, // index options - string? indexNamePrefix = null, bool? forceReindex = null, // channel buffer options int? bufferSize = null, @@ -91,13 +87,11 @@ public async Task Index(IDiagnosticsCollector collector, ApiKey = apiKey, Username = username, Password = password, - NoSemantic = noSemantic, EnableAiEnrichment = enableAiEnrichment, SearchNumThreads = searchNumThreads, IndexNumThreads = indexNumThreads, NoEis = noEis, BootstrapTimeout = bootstrapTimeout, - IndexNamePrefix = indexNamePrefix, ForceReindex = forceReindex, BufferSize = bufferSize, MaxRetries = maxRetries, diff --git a/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs b/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs index c4fcc6c0f..19e060b1b 100644 --- a/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs +++ b/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs @@ -29,13 +29,11 @@ ICoreService githubActionsService /// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY /// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME /// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD - /// Index without semantic fields /// Enable AI enrichment of documents using LLM-generated metadata /// The number of search threads the inference endpoint should use. Defaults: 8 /// The number of index threads the inference endpoint should use. Defaults: 8 /// Do not use the Elastic Inference Service, bootstrap inference endpoint /// Timeout in minutes for the inference endpoint creation. Defaults: 4 - /// The prefix for the computed index/alias names. Defaults: semantic-docs /// Force reindex strategy to semantic index /// The number of documents to send to ES as part of the bulk. Defaults: 100 /// The number of times failed bulk items should be retried. Defaults: 3 @@ -57,14 +55,12 @@ public async Task Index(IDiagnosticsCollector collector, string? username = null, string? password = null, // inference options - bool? noSemantic = null, bool? enableAiEnrichment = null, int? searchNumThreads = null, int? indexNumThreads = null, bool? noEis = null, int? bootstrapTimeout = null, // index options - string? indexNamePrefix = null, bool? forceReindex = null, // channel buffer options int? bufferSize = null, @@ -88,13 +84,11 @@ public async Task Index(IDiagnosticsCollector collector, ApiKey = apiKey, Username = username, Password = password, - NoSemantic = noSemantic, EnableAiEnrichment = enableAiEnrichment, SearchNumThreads = searchNumThreads, IndexNumThreads = indexNumThreads, NoEis = noEis, BootstrapTimeout = bootstrapTimeout, - IndexNamePrefix = indexNamePrefix, ForceReindex = forceReindex, BufferSize = bufferSize, MaxRetries = maxRetries, @@ -114,6 +108,7 @@ public async Task Index(IDiagnosticsCollector collector, return await Build(collector, fileSystem, metadataOnly: true, strict: false, path: path, output: null, pathPrefix: null, force: true, allowIndexing: null, exporters: exporters, canonicalBaseUrl: null, + skipOpenApi: true, ctx: ctx); } } diff --git a/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs b/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs index b49b02250..7c26b7843 100644 --- a/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs +++ b/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs @@ -4,7 +4,9 @@ using Elastic.Clients.Elasticsearch; using Elastic.Clients.Elasticsearch.Serialization; +using Elastic.Documentation.Configuration; using Elastic.Documentation.Configuration.Search; +using Elastic.Documentation.Search; using Elastic.Transport; namespace Elastic.Documentation.Search.Common; @@ -18,52 +20,49 @@ public class ElasticsearchClientAccessor : IDisposable private readonly ElasticsearchClientSettings _clientSettings; private readonly SingleNodePool _nodePool; public ElasticsearchClient Client { get; } - public ElasticsearchOptions Options { get; } + public ElasticsearchEndpoint Endpoint { get; } public SearchConfiguration SearchConfiguration { get; } + public string SearchIndex { get; } public string? RulesetName { get; } public IReadOnlyDictionary SynonymBiDirectional { get; } public IReadOnlyCollection DiminishTerms { get; } public ElasticsearchClientAccessor( - ElasticsearchOptions elasticsearchOptions, - SearchConfiguration searchConfiguration) + DocumentationEndpoints endpoints, + SearchConfiguration searchConfiguration + ) { - Options = elasticsearchOptions; + var endpoint = endpoints.Elasticsearch; + Endpoint = endpoint; SearchConfiguration = searchConfiguration; SynonymBiDirectional = searchConfiguration.SynonymBiDirectional; DiminishTerms = searchConfiguration.DiminishTerms; + + SearchIndex = DocumentationMappingContext.DocumentationDocumentSemantic + .CreateContext(type: "assembler") + .ResolveReadTarget(); + RulesetName = searchConfiguration.Rules.Count > 0 - ? ExtractRulesetName(elasticsearchOptions.IndexName) + ? "docs-ruleset-assembler" : null; - _nodePool = new SingleNodePool(new Uri(elasticsearchOptions.Url.Trim())); + _nodePool = new SingleNodePool(endpoint.Uri); + var auth = endpoint.ApiKey is { } apiKey + ? (AuthorizationHeader)new ApiKey(apiKey) + : endpoint is { Username: { } username, Password: { } password } + ? new BasicAuthentication(username, password) + : null!; + _clientSettings = new ElasticsearchClientSettings( _nodePool, sourceSerializer: (_, settings) => new DefaultSourceSerializer(settings, EsJsonContext.Default) ) - .DefaultIndex(elasticsearchOptions.IndexName) - .Authentication(new ApiKey(elasticsearchOptions.ApiKey)); + .DefaultIndex(SearchIndex) + .Authentication(auth); Client = new ElasticsearchClient(_clientSettings); } - /// - /// Extracts the ruleset name from the index name. - /// Index name format: "semantic-docs-{namespace}-latest" -> ruleset: "docs-ruleset-{namespace}" - /// The namespace may contain hyphens (e.g., "codex-engineering"), so we extract everything - /// between the "semantic-docs-" prefix and the "-latest" suffix. - /// - private static string? ExtractRulesetName(string indexName) - { - const string prefix = "semantic-docs-"; - const string suffix = "-latest"; - if (!indexName.StartsWith(prefix, StringComparison.Ordinal) || !indexName.EndsWith(suffix, StringComparison.Ordinal)) - return null; - - var ns = indexName[prefix.Length..^suffix.Length]; - return string.IsNullOrEmpty(ns) ? null : $"docs-ruleset-{ns}"; - } - /// /// Tests connectivity to the Elasticsearch cluster. /// diff --git a/src/services/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj b/src/services/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj index 27eb575bc..8a350648d 100644 --- a/src/services/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj +++ b/src/services/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj @@ -17,7 +17,6 @@ - diff --git a/src/services/Elastic.Documentation.Search/ElasticsearchOptions.cs b/src/services/Elastic.Documentation.Search/ElasticsearchOptions.cs deleted file mode 100644 index 9327ae816..000000000 --- a/src/services/Elastic.Documentation.Search/ElasticsearchOptions.cs +++ /dev/null @@ -1,39 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using Microsoft.Extensions.Configuration; - -namespace Elastic.Documentation.Search; - -public class ElasticsearchOptions -{ - public ElasticsearchOptions(IConfiguration configuration) - { - // Build a new ConfigurationBuilder to read user secrets - var configBuilder = new ConfigurationBuilder(); - _ = configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); - var userSecretsConfig = configBuilder.Build(); - var elasticUrlFromSecret = userSecretsConfig["Parameters:DocumentationElasticUrl"]; - var elasticApiKeyFromSecret = userSecretsConfig["Parameters:DocumentationElasticApiKey"]; - - Url = GetEnv("DOCUMENTATION_ELASTIC_URL", elasticUrlFromSecret); - ApiKey = GetEnv("DOCUMENTATION_ELASTIC_APIKEY", elasticApiKeyFromSecret); - IndexName = configuration["DOCUMENTATION_ELASTIC_INDEX"] ?? "semantic-docs-dev-latest"; - } - - private static string GetEnv(string name, string? defaultValue = null) - { - var value = Environment.GetEnvironmentVariable(name); - if (!string.IsNullOrEmpty(value)) - return value; - if (defaultValue != null) - return defaultValue; - throw new ArgumentException($"Environment variable '{name}' not found."); - } - - // Read from environment variables (set by Terraform from SSM at deploy time) - public string Url { get; } - public string ApiKey { get; } - public string IndexName { get; } -} diff --git a/src/services/Elastic.Documentation.Search/FullSearchGateway.cs b/src/services/Elastic.Documentation.Search/FullSearchGateway.cs index e0ad1825f..00cdc092a 100644 --- a/src/services/Elastic.Documentation.Search/FullSearchGateway.cs +++ b/src/services/Elastic.Documentation.Search/FullSearchGateway.cs @@ -102,7 +102,7 @@ private async Task SearchWithHybridRrf(FullSearchRequest reque var response = await clientAccessor.Client.SearchAsync(s => { _ = s - .Indices(clientAccessor.Options.IndexName) + .Indices(clientAccessor.SearchIndex) .From(Math.Max(request.PageNumber - 1, 0) * request.PageSize) .Size(request.PageSize) .Query(filteredQuery) @@ -170,7 +170,7 @@ private async Task SearchLexicalOnly(FullSearchRequest request var response = await clientAccessor.Client.SearchAsync(s => { _ = s - .Indices(clientAccessor.Options.IndexName) + .Indices(clientAccessor.SearchIndex) .From(Math.Max(request.PageNumber - 1, 0) * request.PageSize) .Size(request.PageSize) .Query(filteredQuery) diff --git a/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs b/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs index a8c3710ae..dcb20c3e5 100644 --- a/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs +++ b/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs @@ -46,7 +46,7 @@ public async Task SearchImplementation(string query, int var response = await clientAccessor.Client.SearchAsync(s => { _ = s - .Indices(clientAccessor.Options.IndexName) + .Indices(clientAccessor.SearchIndex) .From(Math.Max(pageNumber - 1, 0) * pageSize) .Size(pageSize) .Query(lexicalQuery) @@ -167,7 +167,7 @@ public async Task ExplainDocumentAsync(string query, string docum { // First, find the document by URL var getDocResponse = await clientAccessor.Client.SearchAsync(s => s - .Indices(clientAccessor.Options.IndexName) + .Indices(clientAccessor.SearchIndex) .Query(q => q.Term(t => t.Field(f => f.Url).Value(documentUrl))) .Size(1), ctx); @@ -186,7 +186,7 @@ public async Task ExplainDocumentAsync(string query, string docum // Now explain why this document matches (or doesn't match) the query var explainResponse = await clientAccessor.Client.ExplainAsync( - clientAccessor.Options.IndexName, documentId, e => e.Query(combinedQuery), ctx); + clientAccessor.SearchIndex, documentId, e => e.Query(combinedQuery), ctx); if (!explainResponse.IsValidResponse) { diff --git a/src/services/Elastic.Documentation.Search/ServicesExtension.cs b/src/services/Elastic.Documentation.Search/ServicesExtension.cs index 9b505c6f2..99e6619ae 100644 --- a/src/services/Elastic.Documentation.Search/ServicesExtension.cs +++ b/src/services/Elastic.Documentation.Search/ServicesExtension.cs @@ -25,8 +25,6 @@ public static IServiceCollection AddSearchServices(this IServiceCollection servi var logger = GetLogger(services); logger?.LogInformation("Configuring Search services"); - // Shared Elasticsearch options - DI auto-resolves IConfiguration from primary constructor - _ = services.AddSingleton(); _ = services.AddSingleton(); // Navigation Search (autocomplete/navigation search) diff --git a/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs b/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs index 115dda5b9..df29d5666 100644 --- a/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs +++ b/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs @@ -30,12 +30,10 @@ ICoreService githubActionsService /// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY /// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME /// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD - /// Index without semantic fields /// Enable AI enrichment of documents using LLM-generated metadata /// The number of search threads the inference endpoint should use. Defaults: 8 /// The number of index threads the inference endpoint should use. Defaults: 8 /// Do not use the Elastic Inference Service, bootstrap inference endpoint - /// The prefix for the computed index/alias names. Defaults: semantic-docs /// Force reindex strategy to semantic index /// Timeout in minutes for the inference endpoint creation. Defaults: 4 /// The number of documents to send to ES as part of the bulk. Defaults: 100 @@ -59,7 +57,6 @@ public async Task Index( string? password = null, // inference options - bool? noSemantic = null, bool? enableAiEnrichment = null, int? searchNumThreads = null, int? indexNumThreads = null, @@ -67,7 +64,6 @@ public async Task Index( int? bootstrapTimeout = null, // index options - string? indexNamePrefix = null, bool? forceReindex = null, // channel buffer options @@ -97,9 +93,9 @@ public async Task Index( // endpoint options endpoint, environment, apiKey, username, password, // inference options - noSemantic, enableAiEnrichment, indexNumThreads, searchNumThreads, noEis, bootstrapTimeout, + enableAiEnrichment, indexNumThreads, searchNumThreads, noEis, bootstrapTimeout, // channel and connection options - indexNamePrefix, forceReindex, bufferSize, maxRetries, debugMode, + forceReindex, bufferSize, maxRetries, debugMode, // proxy options proxyAddress, proxyPassword, proxyUsername, // certificate options @@ -110,9 +106,9 @@ static async (s, collector, state, ctx) => await s.Index(collector, state.fs, // endpoint options state.endpoint, state.environment, state.apiKey, state.username, state.password, // inference options - state.noSemantic, state.enableAiEnrichment, state.searchNumThreads, state.indexNumThreads, state.noEis, state.bootstrapTimeout, + state.enableAiEnrichment, state.searchNumThreads, state.indexNumThreads, state.noEis, state.bootstrapTimeout, // channel and connection options - state.indexNamePrefix, state.forceReindex, state.bufferSize, state.maxRetries, state.debugMode, + state.forceReindex, state.bufferSize, state.maxRetries, state.debugMode, // proxy options state.proxyAddress, state.proxyPassword, state.proxyUsername, // certificate options diff --git a/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs b/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs index 6b70e50de..ae11fbb96 100644 --- a/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs +++ b/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs @@ -36,12 +36,10 @@ ICoreService githubActionsService /// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY /// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME /// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD - /// Index without semantic fields /// Enable AI enrichment of documents using LLM-generated metadata /// The number of search threads the inference endpoint should use. Defaults: 8 /// The number of index threads the inference endpoint should use. Defaults: 8 /// Do not use the Elastic Inference Service, bootstrap inference endpoint - /// The prefix for the computed index/alias names. Defaults: semantic-docs /// Force reindex strategy to semantic index /// Timeout in minutes for the inference endpoint creation. Defaults: 4 /// The number of documents to send to ES as part of the bulk. Defaults: 100 @@ -65,7 +63,6 @@ public async Task Index( string? password = null, // inference options - bool? noSemantic = null, bool? enableAiEnrichment = null, int? searchNumThreads = null, int? indexNumThreads = null, @@ -73,7 +70,6 @@ public async Task Index( int? bootstrapTimeout = null, // index options - string? indexNamePrefix = null, bool? forceReindex = null, // channel buffer options @@ -134,13 +130,11 @@ public async Task Index( ApiKey = apiKey, Username = username, Password = password, - NoSemantic = noSemantic, EnableAiEnrichment = enableAiEnrichment, SearchNumThreads = searchNumThreads, IndexNumThreads = indexNumThreads, NoEis = noEis, BootstrapTimeout = bootstrapTimeout, - IndexNamePrefix = indexNamePrefix, ForceReindex = forceReindex, BufferSize = bufferSize, MaxRetries = maxRetries, diff --git a/src/tooling/docs-builder/Commands/IndexCommand.cs b/src/tooling/docs-builder/Commands/IndexCommand.cs index efc1af596..ff402ce16 100644 --- a/src/tooling/docs-builder/Commands/IndexCommand.cs +++ b/src/tooling/docs-builder/Commands/IndexCommand.cs @@ -28,11 +28,9 @@ ICoreService githubActionsService /// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY /// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME /// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD - /// Index without semantic fields /// Enable AI enrichment of documents using LLM-generated metadata /// The number of search threads the inference endpoint should use. Defaults: 8 /// The number of index threads the inference endpoint should use. Defaults: 8 - /// The prefix for the computed index/alias names. Defaults: semantic-docs /// Do not use the Elastic Inference Service, bootstrap inference endpoint /// Force reindex strategy to semantic index /// Timeout in minutes for the inference endpoint creation. Defaults: 4 @@ -57,7 +55,6 @@ public async Task Index( string? password = null, // inference options - bool? noSemantic = null, bool? enableAiEnrichment = null, int? searchNumThreads = null, int? indexNumThreads = null, @@ -65,7 +62,6 @@ public async Task Index( int? bootstrapTimeout = null, // index options - string? indexNamePrefix = null, bool? forceReindex = null, // channel buffer options @@ -95,9 +91,9 @@ public async Task Index( // endpoint options endpoint, apiKey, username, password, // inference options - noSemantic, enableAiEnrichment, indexNumThreads, noEis, searchNumThreads, bootstrapTimeout, + enableAiEnrichment, indexNumThreads, noEis, searchNumThreads, bootstrapTimeout, // channel and connection options - indexNamePrefix, forceReindex, bufferSize, maxRetries, debugMode, + forceReindex, bufferSize, maxRetries, debugMode, // proxy options proxyAddress, proxyPassword, proxyUsername, // certificate options @@ -108,9 +104,9 @@ static async (s, collector, state, ctx) => await s.Index(collector, state.fs, st // endpoint options state.endpoint, state.apiKey, state.username, state.password, // inference options - state.noSemantic, state.enableAiEnrichment, state.searchNumThreads, state.indexNumThreads, state.noEis, state.bootstrapTimeout, + state.enableAiEnrichment, state.searchNumThreads, state.indexNumThreads, state.noEis, state.bootstrapTimeout, // channel and connection options - state.indexNamePrefix, state.forceReindex, state.bufferSize, state.maxRetries, state.debugMode, + state.forceReindex, state.bufferSize, state.maxRetries, state.debugMode, // proxy options state.proxyAddress, state.proxyPassword, state.proxyUsername, // certificate options diff --git a/src/tooling/docs-builder/DocumentationTooling.cs b/src/tooling/docs-builder/DocumentationTooling.cs index 298d82cd2..f8b3a952c 100644 --- a/src/tooling/docs-builder/DocumentationTooling.cs +++ b/src/tooling/docs-builder/DocumentationTooling.cs @@ -14,6 +14,7 @@ using Elastic.Documentation.Configuration.Search; using Elastic.Documentation.Configuration.Versions; using Elastic.Documentation.Diagnostics; +using Elastic.Documentation.ServiceDefaults; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Logging; @@ -42,33 +43,10 @@ public static TBuilder AddDocumentationToolingDefaults(this TBuilder b return new DiagnosticsCollector([]); return new ConsoleDiagnosticsCollector(logFactory, githubActionsService); }) - .AddSingleton(sp => + .AddSingleton(_ => { - var resolver = sp.GetRequiredService(); - var elasticsearchUri = ResolveServiceEndpoint(resolver, - () => TryEnvVars("http://localhost:9200", "DOCUMENTATION_ELASTIC_URL", "CONNECTIONSTRINGS__ELASTICSEARCH") - ); - var elasticsearchPassword = - elasticsearchUri.UserInfo is { } userInfo && userInfo.Contains(':') - ? userInfo.Split(':')[1] - : TryEnvVarsOptional("DOCUMENTATION_ELASTIC_PASSWORD"); - - var elasticsearchUser = - elasticsearchUri.UserInfo is { } userInfo2 && userInfo2.Contains(':') - ? userInfo2.Split(':')[0] - : TryEnvVars("elastic", "DOCUMENTATION_ELASTIC_USERNAME"); - - var elasticsearchApiKey = TryEnvVarsOptional("DOCUMENTATION_ELASTIC_APIKEY"); - return new DocumentationEndpoints - { - Elasticsearch = new ElasticsearchEndpoint - { - Uri = elasticsearchUri, - Password = elasticsearchPassword, - ApiKey = elasticsearchApiKey, - Username = elasticsearchUser - }, - }; + var endpoints = ElasticsearchEndpointFactory.Create(builder.Configuration); + return endpoints; }) .AddSingleton(sp => { diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs index 18b096b6a..cb9411ec5 100644 --- a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs +++ b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs @@ -7,12 +7,15 @@ using Documentation.Builder.Diagnostics.Console; using Elastic.Documentation.Aspire; using Elastic.Documentation.Configuration; +using Elastic.Documentation.Search; +using Elastic.Documentation.ServiceDefaults; using Elastic.Ingest.Elasticsearch; +using Elastic.Ingest.Elasticsearch.Indices; +using Elastic.Mapping; using Elastic.Markdown.Exporters.Elasticsearch; using Elastic.Transport; using Elastic.Transport.Products.Elasticsearch; using FluentAssertions; -using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; @@ -136,61 +139,33 @@ private async ValueTask IsIndexingNeeded() { try { - // Get Elasticsearch configuration from Aspire - var (elasticsearchUrl, apiKey, password, username) = GetElasticsearchConfiguration(); + var endpoints = ElasticsearchEndpointFactory.Create(); - if (string.IsNullOrEmpty(elasticsearchUrl)) - { - Console.WriteLine("No Elasticsearch URL configured, indexing will be performed."); - Connected = false; - return false; - } - - Console.WriteLine($"Checking remote Elasticsearch at {elasticsearchUrl} for existing data..."); - - // Create Elasticsearch endpoint configuration - var endpoint = new ElasticsearchEndpoint - { - Uri = new Uri(elasticsearchUrl), - ApiKey = apiKey, - Username = username, - Password = password - }; - - // Create transport configuration (similar to ElasticsearchMarkdownExporter) - var configuration = new ElasticsearchConfiguration(endpoint.Uri) - { - Authentication = endpoint.ApiKey is { } eApiKey - ? new ApiKey(eApiKey) - : endpoint is { Username: { } eUsername, Password: { } ePassword } - ? new BasicAuthentication(eUsername, ePassword) - : null, - EnableHttpCompression = true - }; + var endpoint = endpoints.Elasticsearch; + Console.WriteLine($"Checking remote Elasticsearch at {endpoint.Uri} for existing data..."); - var transport = new DistributedTransport(configuration); + var transport = ElasticsearchTransportFactory.Create(endpoint); Connected = (await transport.HeadAsync("/", TestContext.Current.CancellationToken)).ApiCallDetails.HasSuccessfulStatusCode; // Create a logger factory and diagnostics collector var loggerFactory = fixture.DistributedApplication.Services.GetRequiredService(); var collector = new ConsoleDiagnosticsCollector(loggerFactory); - // Create semantic exporter to check channel hash (index namespace is 'dev' for tests) - using var semanticExporter = new ElasticsearchSemanticIngestChannel( - loggerFactory, - collector, - endpoint, - "dev", // index namespace - transport, - [] - ); + // Create semantic type context to check channel hash (index namespace is 'dev' for tests) + var semanticTypeContext = DocumentationMappingContext.DocumentationDocumentSemantic.CreateContext(type: "assembler") with + { + ConfigureAnalysis = a => DocumentationAnalysisFactory.BuildAnalysis(a, "docs-assembler", []) + }; + + var options = new IngestChannelOptions(transport, semanticTypeContext); + using var channel = new IngestChannel(options); // Get the current hash from Elasticsearch index template - var currentSemanticHash = await semanticExporter.Channel.GetIndexTemplateHashAsync(TestContext.Current.CancellationToken) ?? string.Empty; + var currentSemanticHash = await channel.GetIndexTemplateHashAsync(TestContext.Current.CancellationToken) ?? string.Empty; - // Get the expected channel hash from the semantic exporter - await semanticExporter.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Silent, ctx: TestContext.Current.CancellationToken); - var expectedSemanticHash = semanticExporter.Channel.ChannelHash; + // Get the expected channel hash + _ = await channel.BootstrapElasticsearchAsync(BootstrapMethod.Silent, TestContext.Current.CancellationToken); + var expectedSemanticHash = channel.ChannelHash; Console.WriteLine($"Elasticsearch semantic hash: '{currentSemanticHash}'"); Console.WriteLine($"Expected semantic hash: '{expectedSemanticHash}'"); @@ -232,41 +207,6 @@ private async ValueTask ValidateResourceExitCode(string resourceName) Console.WriteLine($"{resourceName} completed with exit code 0"); } - /// - /// Gets Elasticsearch configuration from Aspire parameters and environment. - /// Manually reads user secrets from the aspire project, then falls back to environment variables. - /// - private (string? Url, string? ApiKey, string? Password, string? Username) GetElasticsearchConfiguration() - { - // Manually read user secrets from the aspire project - // UserSecretsId from aspire.csproj: 72f50f33-6fb9-4d08-bff3-39568fe370b3 - var configBuilder = new ConfigurationBuilder(); - configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); - var userSecretsConfig = configBuilder.Build(); - - // Get URL - try user secrets first, then Aspire configuration, then environment - var url = userSecretsConfig["Parameters:DocumentationElasticUrl"] - ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticUrl"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL"); - - // Get API Key - try user secrets first, then Aspire configuration, then environment - var apiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"] - ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticApiKey"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY"); - - // Get password for local Elasticsearch (when using --start-elasticsearch) - var password = userSecretsConfig["Parameters:DocumentationElasticPassword"] ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_PASSWORD"); - - // Get username (defaults to "elastic") - var username = userSecretsConfig["Parameters:DocumentationElasticUsername"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_USERNAME") - ?? "elastic"; - - Console.WriteLine($"Elasticsearch configuration retrieved: URL={url != null}, ApiKey={apiKey != null}, Password={password != null}"); - - return (url, apiKey, password, username); - } - public ValueTask DisposeAsync() { HttpClient?.Dispose(); diff --git a/tests-integration/Mcp.Remote.IntegrationTests/Mcp.Remote.IntegrationTests.csproj b/tests-integration/Mcp.Remote.IntegrationTests/Mcp.Remote.IntegrationTests.csproj index 2896294b0..8889bbcba 100644 --- a/tests-integration/Mcp.Remote.IntegrationTests/Mcp.Remote.IntegrationTests.csproj +++ b/tests-integration/Mcp.Remote.IntegrationTests/Mcp.Remote.IntegrationTests.csproj @@ -10,6 +10,7 @@ + diff --git a/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs b/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs index a6ec7c7dd..67f8570a6 100644 --- a/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs +++ b/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs @@ -10,7 +10,7 @@ using Elastic.Documentation.Mcp.Remote.Tools; using Elastic.Documentation.Search; using Elastic.Documentation.Search.Common; -using Microsoft.Extensions.Configuration; +using Elastic.Documentation.ServiceDefaults; using Microsoft.Extensions.Logging.Abstractions; namespace Mcp.Remote.IntegrationTests; @@ -25,17 +25,16 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output) /// /// Creates SearchTools with all required dependencies. /// - protected (SearchTools? Tools, ElasticsearchClientAccessor? ClientAccessor) CreateSearchTools() + protected (SearchTools Tools, ElasticsearchClientAccessor? ClientAccessor) CreateSearchTools() { var clientAccessor = CreateElasticsearchClientAccessor(); - if (clientAccessor == null) - return (null, null); var productsConfig = CreateProductsConfiguration(); var fullSearchGateway = new FullSearchGateway( clientAccessor, productsConfig, - NullLogger.Instance); + NullLogger.Instance + ); var searchTools = new SearchTools(fullSearchGateway, NullLogger.Instance); return (searchTools, clientAccessor); @@ -44,16 +43,11 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output) /// /// Creates DocumentTools with all required dependencies. /// - protected (DocumentTools? Tools, ElasticsearchClientAccessor? ClientAccessor) CreateDocumentTools() + protected (DocumentTools Tools, ElasticsearchClientAccessor? ClientAccessor) CreateDocumentTools() { var clientAccessor = CreateElasticsearchClientAccessor(); - if (clientAccessor == null) - return (null, null); - - var documentGateway = new DocumentGateway( - clientAccessor, - NullLogger.Instance); + var documentGateway = new DocumentGateway(clientAccessor, NullLogger.Instance); var documentTools = new DocumentTools(documentGateway, NullLogger.Instance); return (documentTools, clientAccessor); } @@ -61,18 +55,12 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output) /// /// Creates CoherenceTools with all required dependencies. /// - protected (CoherenceTools? Tools, ElasticsearchClientAccessor? ClientAccessor) CreateCoherenceTools() + protected (CoherenceTools Tools, ElasticsearchClientAccessor? ClientAccessor) CreateCoherenceTools() { var clientAccessor = CreateElasticsearchClientAccessor(); - if (clientAccessor == null) - return (null, null); var productsConfig = CreateProductsConfiguration(); - var fullSearchGateway = new FullSearchGateway( - clientAccessor, - productsConfig, - NullLogger.Instance); - + var fullSearchGateway = new FullSearchGateway(clientAccessor, productsConfig, NullLogger.Instance); var coherenceTools = new CoherenceTools(fullSearchGateway, NullLogger.Instance); return (coherenceTools, clientAccessor); } @@ -80,34 +68,10 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output) /// /// Creates an ElasticsearchClientAccessor using configuration from user secrets and environment variables. /// - private static ElasticsearchClientAccessor? CreateElasticsearchClientAccessor() + private static ElasticsearchClientAccessor CreateElasticsearchClientAccessor() { - var configBuilder = new ConfigurationBuilder(); - configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); - configBuilder.AddEnvironmentVariables(); - var config = configBuilder.Build(); - - var elasticsearchUrl = - config["Parameters:DocumentationElasticUrl"] - ?? config["DOCUMENTATION_ELASTIC_URL"]; - - var elasticsearchApiKey = - config["Parameters:DocumentationElasticApiKey"] - ?? config["DOCUMENTATION_ELASTIC_APIKEY"]; - - if (string.IsNullOrEmpty(elasticsearchUrl) || string.IsNullOrEmpty(elasticsearchApiKey)) - return null; - - var testConfig = new ConfigurationBuilder() - .AddInMemoryCollection(new Dictionary - { - ["DOCUMENTATION_ELASTIC_URL"] = elasticsearchUrl, - ["DOCUMENTATION_ELASTIC_APIKEY"] = elasticsearchApiKey, - ["DOCUMENTATION_ELASTIC_INDEX"] = "semantic-docs-dev-latest" - }) - .Build(); - - var options = new ElasticsearchOptions(testConfig); + var endpoints = ElasticsearchEndpointFactory.Create(); + var searchConfig = new SearchConfiguration { Synonyms = new Dictionary(), @@ -115,7 +79,7 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output) DiminishTerms = ["plugin", "client", "integration", "glossary"] }; - return new ElasticsearchClientAccessor(options, searchConfig); + return new ElasticsearchClientAccessor(endpoints, searchConfig); } /// diff --git a/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj b/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj index 0754b8a91..4763c5eb0 100644 --- a/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj +++ b/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj @@ -10,6 +10,7 @@ + diff --git a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs index 3d93fd012..34246dc35 100644 --- a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs +++ b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs @@ -5,8 +5,8 @@ using Elastic.Documentation.Configuration.Search; using Elastic.Documentation.Search; using Elastic.Documentation.Search.Common; +using Elastic.Documentation.ServiceDefaults; using FluentAssertions; -using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Logging.Abstractions; namespace Search.IntegrationTests; @@ -220,37 +220,10 @@ public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring() /// /// Creates an ElasticsearchGateway instance using configuration from the distributed application. /// - private NavigationSearchGateway? CreateFindPageGateway() + private NavigationSearchGateway CreateFindPageGateway() { - // Build a new ConfigurationBuilder to read user secrets and environment variables - var configBuilder = new ConfigurationBuilder(); - configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); - configBuilder.AddEnvironmentVariables(); - var config = configBuilder.Build(); - - // Get Elasticsearch configuration with fallback chain: user secrets → environment - var elasticsearchUrl = - config["Parameters:DocumentationElasticUrl"] - ?? config["DOCUMENTATION_ELASTIC_URL"]; - - var elasticsearchApiKey = - config["Parameters:DocumentationElasticApiKey"] - ?? config["DOCUMENTATION_ELASTIC_APIKEY"]; - - if (elasticsearchUrl is null or "" || elasticsearchApiKey is null or "") - return null; - - // Create IConfiguration with the required values for ElasticsearchOptions - var testConfig = new ConfigurationBuilder() - .AddInMemoryCollection(new Dictionary - { - ["DOCUMENTATION_ELASTIC_URL"] = elasticsearchUrl, - ["DOCUMENTATION_ELASTIC_APIKEY"] = elasticsearchApiKey, - ["DOCUMENTATION_ELASTIC_INDEX"] = "semantic-docs-dev-latest" - }) - .Build(); + var endpoints = ElasticsearchEndpointFactory.Create(); - var options = new ElasticsearchOptions(testConfig); var searchConfig = new SearchConfiguration { Synonyms = new Dictionary(), @@ -278,7 +251,7 @@ public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring() DiminishTerms = ["plugin", "client", "integration", "glossary"] }; - var clientAccessor = new ElasticsearchClientAccessor(options, searchConfig); + var clientAccessor = new ElasticsearchClientAccessor(endpoints, searchConfig); return new NavigationSearchGateway(clientAccessor, NullLogger.Instance); } }