diff --git a/Directory.Packages.props b/Directory.Packages.props
index 7ef35e13b..87f4bd481 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -48,7 +48,8 @@
-
+
+
diff --git a/PLAN-rules-config.md b/PLAN-rules-config.md
deleted file mode 100644
index 94fc33bbd..000000000
--- a/PLAN-rules-config.md
+++ /dev/null
@@ -1,194 +0,0 @@
-# Improved Rules Configuration Format
-
-## Context
-
-The `block` section in `changelog.yml` is being redesigned and renamed to `rules:`. Goals:
-1. Explicit matching semantics (`any` vs `all`)
-2. Per-field include/exclude modes for types and areas
-3. Product overrides nested under the section they affect
-4. Clear, scannable log messages prefixed with `[+include]` / `[-exclude]`
-5. No backward compat — error if old `block:` key is seen
-
-## YAML Format
-
-```yaml
-rules:
- # Global match default for multi-valued fields (labels, areas).
- # any (default) = match if ANY item matches the list
- # all = match only if ALL items match the list
- # Inherited by create, publish, and all product overrides.
- # match: any
-
- # Create — controls which PRs generate changelog entries.
- # exclude: block PRs with these labels (comma-separated)
- # include: only create changelogs for PRs with these labels
- # Cannot specify both.
- #
- # create:
- # exclude: ">non-issue, >test"
- # # match: any
- # products:
- # 'elasticsearch, kibana':
- # exclude: ">test"
- # 'cloud-serverless':
- # exclude: "ILM"
-
- # Publish — controls which entries appear in rendered output.
- # exclude_types / include_types
- # exclude_areas / include_areas
- # Cannot mix exclude_ and include_ for the same field.
- #
- # match_areas inherits from rules.match if not specified.
- #
- # publish:
- # # match_areas: any
- # exclude_types:
- # - deprecation
- # - known-issue
- # exclude_areas:
- # - "Internal"
- # products:
- # 'elasticsearch, kibana':
- # exclude_types:
- # - docs
- # 'cloud-serverless':
- # # match_areas: any
- # include_areas:
- # - "Search"
- # - "Monitoring"
-```
-
-### Match inheritance
-
-```
-rules.match (global default, "any" if omitted)
- ├─ create.match → create.products.{id}.match
- └─ publish.match_areas → publish.products.{id}.match_areas
-```
-
-### Area matching examples
-
-| Config | Entry areas: `["Search", "Internal"]` | Result |
-|--------|--------------------------------------|--------|
-| `exclude_areas: [Internal]`, match `any` | "Internal" matches | **Blocked** |
-| `exclude_areas: [Internal]`, match `all` | Not all match | **Allowed** |
-| `include_areas: [Search]`, match `any` | "Search" matches | **Allowed** |
-| `include_areas: [Search]`, match `all` | "Internal" not in list | **Blocked** |
-
-## Error Messages
-
-### Validation (config parsing)
-
-| Condition | Message |
-|-----------|---------|
-| Old `block:` key found | `'block' is no longer supported. Rename to 'rules'. See changelog.example.yml.` |
-| Both `exclude_types` + `include_types` | `rules.publish: cannot have both 'exclude_types' and 'include_types'. Use one or the other.` |
-| Both `exclude_areas` + `include_areas` | Same pattern |
-| Both `create.exclude` + `create.include` | `rules.create: cannot have both 'exclude' and 'include'. Use one or the other.` |
-| Invalid match value | `rules.match: '{value}' is not valid. Use 'any' or 'all'.` |
-| Empty list | `rules.publish.exclude_types: list is empty. Add types or remove the field.` |
-| Unknown product | `rules.publish.products: '{id}' not in available products. Available: {list}` |
-
-### Runtime (create/publish time)
-
-Prefixed with `[-exclude]` or `[+include]` for scanning:
-
-**Create:**
-- `[-exclude] PR #{n}: skipped, label '{label}' matches rules.create.exclude (match: {mode})`
-- `[+include] PR #{n}: created, label '{label}' matches rules.create.include (match: {mode})`
-- `[+include] PR #{n}: skipped, no labels match rules.create.include [{labels}] (match: {mode})`
-- Product: `[-exclude] PR #{n} ({product}): skipped, label '{label}' matches rules.create.products.{product}.exclude`
-
-**Publish:**
-- `[-exclude] PR #{n}: hidden, type '{type}' in rules.publish.exclude_types`
-- `[+include] PR #{n}: hidden, type '{type}' not in rules.publish.include_types`
-- `[-exclude] PR #{n}: hidden, area '{area}' in rules.publish.exclude_areas (match_areas: {mode})`
-- `[-exclude] PR #{n}: hidden, all areas [{areas}] in rules.publish.exclude_areas (match_areas: all)`
-- `[+include] PR #{n}: hidden, areas [{areas}] not in rules.publish.include_areas (match_areas: {mode})`
-- Product: same patterns with `rules.publish.products.{product}.` prefix
-
-## Files to Modify
-
-### 1. Domain model — enums and PublishBlocker
-**`src/Elastic.Documentation/ReleaseNotes/PublishBlocker.cs`**
-
-- Add `MatchMode` enum (`Any`, `All`)
-- Add `FieldMode` enum (`Exclude`, `Include`)
-- Add to `PublishBlocker`: `MatchAreas` (MatchMode), `TypesMode` (FieldMode), `AreasMode` (FieldMode)
-
-### 2. Domain model — rename and restructure BlockConfiguration
-**`src/Elastic.Documentation.Configuration/Changelog/BlockConfiguration.cs`**
-
-Rename to `RulesConfiguration` (or new file). Structure:
-- `RulesConfiguration`: `Match` (MatchMode), `Create` (CreateRules?), `Publish` (PublishRules?)
-- `CreateRules`: `Labels` (list), `Mode` (FieldMode), `Match` (MatchMode?), `ByProduct` (dict)
-- `PublishRules`: `PublishBlocker` fields + `ByProduct` (dict of product-specific `PublishBlocker`s)
-- Delete old `ProductBlockers` record
-
-### 3. Core blocking logic
-**`src/Elastic.Documentation/ReleaseNotes/PublishBlockerExtensions.cs`**
-
-- `MatchesType()`: type vs list
-- `MatchesArea()`: any/all matching
-- `ShouldBlock()`: per-field mode (`Exclude` + match → blocked; `Include` + no match → blocked)
-
-### 4. YAML DTO (CLI path)
-**`src/services/Elastic.Changelog/Serialization/ChangelogConfigurationYaml.cs`**
-
-- Rename `BlockConfigurationYaml` → `RulesConfigurationYaml`
-- New `CreateRulesYaml`: `Exclude`/`Include` (string), `Match` (string?), `Products` (dict)
-- Update `PublishBlockerYaml`: `MatchAreas`, `ExcludeTypes`/`IncludeTypes`, `ExcludeAreas`/`IncludeAreas`, `Products` (dict)
-- Remove old fields (`Types`, `Areas`, `Create` string, root `Product`)
-- Update parent `ChangelogConfigurationYaml`: rename `Block` → `Rules`
-
-### 5. YAML DTO (minimal/inline path)
-**`src/Elastic.Documentation.Configuration/ReleaseNotes/ReleaseNotesSerialization.cs`**
-
-Mirror changes for minimal DTOs. Rename `BlockConfigMinimalDto` → `RulesConfigMinimalDto`, etc.
-
-### 6. Configuration parsing + validation
-**`src/services/Elastic.Changelog/Configuration/ChangelogConfigurationLoader.cs`**
-
-- Detect old `block:` key → emit error
-- Parse `rules:` with new structure
-- Validate mutual exclusivity, match values, empty lists
-- Resolve match inheritance chain
-
-### 7. Create blocking logic
-Find where create labels are checked and update for include/exclude + match + runtime messages.
-
-### 8. Rendering utilities
-**`src/services/Elastic.Changelog/Rendering/ChangelogRenderUtilities.cs`**
-
-- Update for new `publish.products` structure
-- Add `[-exclude]` / `[+include]` prefixed runtime log messages
-
-### 9. Example config
-**`config/changelog.example.yml`** — replace `block:` section with `rules:`.
-
-### 10. All references to BlockConfiguration
-Find and update all code referencing `BlockConfiguration`, `Block`, `ProductBlockers` to use new names.
-
-### 11. Tests
-
-**Unit tests** (`PublishBlockerExtensionsTests.cs`):
-- All mode/match combinations (exclude×any, exclude×all, include×any, include×all)
-- Mixed modes (exclude_types + include_areas)
-- Match inheritance (global → section → product)
-
-**Integration tests** (`BlockConfigurationTests.cs`):
-- New format end-to-end
-- Validation error messages (mutual exclusivity, invalid match, old `block:` key)
-- Product overrides under publish.products and create.products
-- Create include/exclude + match
-- Runtime message prefixes `[-exclude]` / `[+include]`
-
-## Verification
-
-1. New unit tests for all mode/match combinations
-2. Integration tests with new config format
-3. Validation error tests — verify all error messages
-4. Old `block:` key → error test
-5. YAML parsing on both CLI and minimal paths
-6. Runtime messages at create and publish time with correct prefixes
-7. Match inheritance chain works correctly
diff --git a/docs/cli/assembler/assembler-index.md b/docs/cli/assembler/assembler-index.md
index 5d551e4b4..8ae72ddcd 100644
--- a/docs/cli/assembler/assembler-index.md
+++ b/docs/cli/assembler/assembler-index.md
@@ -29,9 +29,6 @@ docs-builder assembler index [options...] [-h|--help] [--version]
`--password` ``
: Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD (optional)
-`--no-semantic` ``
-: Index without semantic fields (optional)
-
`--search-num-threads` ``
: The number of search threads the inference endpoint should use. Defaults: 8 (optional)
diff --git a/docs/cli/docset/index-command.md b/docs/cli/docset/index-command.md
index 32aa3a25b..00e28cf1c 100644
--- a/docs/cli/docset/index-command.md
+++ b/docs/cli/docset/index-command.md
@@ -25,9 +25,6 @@ docs-builder index [options...] [-h|--help] [--version]
`--password` ``
: Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD (optional)
-`--no-semantic` ``
-: Index without semantic fields (optional)
-
`--search-num-threads` ``
: The number of search threads the inference endpoint should use. Defaults: 8 (optional)
diff --git a/src/Elastic.Codex/Building/CodexBuildService.cs b/src/Elastic.Codex/Building/CodexBuildService.cs
index 8db6350d2..a502ff52c 100644
--- a/src/Elastic.Codex/Building/CodexBuildService.cs
+++ b/src/Elastic.Codex/Building/CodexBuildService.cs
@@ -85,7 +85,7 @@ public async Task BuildAll(
if (exporters is not null && buildContexts.Count > 0)
{
var firstContext = buildContexts[0].BuildContext;
- sharedExporters = exporters.CreateMarkdownExporters(logFactory, firstContext, context.IndexNamespace).ToArray();
+ sharedExporters = exporters.CreateMarkdownExporters(logFactory, firstContext, "codex").ToArray();
var startTasks = sharedExporters.Select(async e => await e.StartAsync(ctx));
await Task.WhenAll(startTasks);
}
diff --git a/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs b/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs
index 367fe844b..0e6ee09ce 100644
--- a/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs
+++ b/src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs
@@ -9,6 +9,7 @@ namespace Elastic.Documentation.Configuration;
public class DocumentationEndpoints
{
public required ElasticsearchEndpoint Elasticsearch { get; init; }
+ public string Namespace { get; set; } = "dev";
}
public class ElasticsearchEndpoint
@@ -25,9 +26,6 @@ public class ElasticsearchEndpoint
public int IndexNumThreads { get; set; } = 4; // Reduced for Serverless rate limits
public bool NoElasticInferenceService { get; set; }
- // index options
- public string IndexNamePrefix { get; set; } = "semantic-docs";
-
// channel buffer options
public int BufferSize { get; set; } = 50; // Reduced for Serverless rate limits
public int MaxRetries { get; set; } = 5; // Increased for 429 retries
@@ -43,7 +41,6 @@ public class ElasticsearchEndpoint
public X509Certificate? Certificate { get; set; }
public bool CertificateIsNotRoot { get; set; }
public int? BootstrapTimeout { get; set; }
- public bool NoSemantic { get; set; }
public bool ForceReindex { get; set; }
///
diff --git a/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs b/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs
index e8031368c..4bd1586c1 100644
--- a/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs
+++ b/src/Elastic.Documentation.Configuration/ElasticsearchEndpointConfigurator.cs
@@ -21,7 +21,6 @@ public record ElasticsearchIndexOptions
public string? Password { get; init; }
// inference options
- public bool? NoSemantic { get; init; }
public bool? EnableAiEnrichment { get; init; }
public int? SearchNumThreads { get; init; }
public int? IndexNumThreads { get; init; }
@@ -29,7 +28,6 @@ public record ElasticsearchIndexOptions
public int? BootstrapTimeout { get; init; }
// index options
- public string? IndexNamePrefix { get; init; }
public bool? ForceReindex { get; init; }
// channel buffer options
@@ -85,8 +83,6 @@ public static async Task ApplyAsync(
cfg.IndexNumThreads = options.IndexNumThreads.Value;
if (options.NoEis.HasValue)
cfg.NoElasticInferenceService = options.NoEis.Value;
- if (!string.IsNullOrEmpty(options.IndexNamePrefix))
- cfg.IndexNamePrefix = options.IndexNamePrefix;
if (options.BufferSize.HasValue)
cfg.BufferSize = options.BufferSize.Value;
if (options.MaxRetries.HasValue)
@@ -117,8 +113,6 @@ public static async Task ApplyAsync(
if (options.BootstrapTimeout.HasValue)
cfg.BootstrapTimeout = options.BootstrapTimeout.Value;
- if (options.NoSemantic.HasValue)
- cfg.NoSemantic = options.NoSemantic.Value;
if (options.EnableAiEnrichment.HasValue)
cfg.EnableAiEnrichment = options.EnableAiEnrichment.Value;
if (options.ForceReindex.HasValue)
diff --git a/src/Elastic.Documentation.ServiceDefaults/AppDefaultsExtensions.cs b/src/Elastic.Documentation.ServiceDefaults/AppDefaultsExtensions.cs
index eae34aeac..4b3a497eb 100644
--- a/src/Elastic.Documentation.ServiceDefaults/AppDefaultsExtensions.cs
+++ b/src/Elastic.Documentation.ServiceDefaults/AppDefaultsExtensions.cs
@@ -45,6 +45,9 @@ public static TBuilder AddDocumentationServiceDefaults(this TBuilder b
_ = builder.Services.AddElasticDocumentationLogging(globalArgs.LogLevel, noConsole: globalArgs.IsMcp);
_ = services.AddSingleton(globalArgs);
+ var endpoints = ElasticsearchEndpointFactory.Create(builder.Configuration);
+ _ = services.AddSingleton(endpoints);
+
return builder.AddServiceDefaults();
}
diff --git a/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj b/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj
index 4357d65ce..ef3d8edd8 100644
--- a/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj
+++ b/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj
@@ -17,6 +17,7 @@
+
diff --git a/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs b/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs
new file mode 100644
index 000000000..86def5bbc
--- /dev/null
+++ b/src/Elastic.Documentation.ServiceDefaults/ElasticsearchEndpointFactory.cs
@@ -0,0 +1,98 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+using Elastic.Documentation.Configuration;
+using Microsoft.Extensions.Configuration;
+
+namespace Elastic.Documentation.ServiceDefaults;
+
+/// Centralizes user-secrets + env-var reading for Elasticsearch configuration.
+public static class ElasticsearchEndpointFactory
+{
+ private const string UserSecretsId = "72f50f33-6fb9-4d08-bff3-39568fe370b3";
+
+ ///
+ /// Creates from user secrets and environment variables.
+ /// Returns null when no URL is available.
+ ///
+ public static DocumentationEndpoints Create(IConfiguration? appConfiguration = null)
+ {
+ var configBuilder = new ConfigurationBuilder();
+ _ = configBuilder.AddUserSecrets(UserSecretsId);
+ _ = configBuilder.AddEnvironmentVariables();
+ var config = configBuilder.Build();
+
+ var url =
+ config["Parameters:DocumentationElasticUrl"]
+ ?? config["DOCUMENTATION_ELASTIC_URL"];
+
+ var apiKey =
+ config["Parameters:DocumentationElasticApiKey"]
+ ?? config["DOCUMENTATION_ELASTIC_APIKEY"];
+
+ var password =
+ config["Parameters:DocumentationElasticPassword"]
+ ?? config["DOCUMENTATION_ELASTIC_PASSWORD"];
+
+ var username =
+ config["Parameters:DocumentationElasticUsername"]
+ ?? config["DOCUMENTATION_ELASTIC_USERNAME"]
+ ?? "elastic";
+
+ if (string.IsNullOrEmpty(url))
+ {
+ return new DocumentationEndpoints
+ {
+ Elasticsearch = new ElasticsearchEndpoint { Uri = new Uri("http://localhost:9200") }
+ };
+ }
+
+ var endpoint = new ElasticsearchEndpoint
+ {
+ Uri = new Uri(url),
+ ApiKey = apiKey,
+ Password = password,
+ Username = username
+ };
+
+ var ns = ResolveEnvironment(config, appConfiguration);
+
+ return new DocumentationEndpoints { Elasticsearch = endpoint, Namespace = ns };
+ }
+
+ ///
+ /// Resolves the environment name using this priority:
+ /// 1. DOCUMENTATION_ELASTIC_INDEX env var — parse old format {variant}-docs-{env}-{timestamp}
+ /// 2. DOTNET_ENVIRONMENT env var
+ /// 3. ENVIRONMENT env var
+ /// 4. Fallback: "dev"
+ ///
+ private static string ResolveEnvironment(IConfiguration config, IConfiguration? appConfiguration)
+ {
+ var indexName = appConfiguration?["DOCUMENTATION_ELASTIC_INDEX"]
+ ?? config["DOCUMENTATION_ELASTIC_INDEX"];
+
+ if (!string.IsNullOrEmpty(indexName))
+ {
+ // Old production format: {variant}-docs-{env}-{timestamp}
+ // e.g. "lexical-docs-edge-2025.10.23.120521"
+ // Extract the environment segment after "docs-" and before the next "-" followed by digits.
+ const string marker = "-docs-";
+ var markerIndex = indexName.IndexOf(marker, StringComparison.OrdinalIgnoreCase);
+ if (markerIndex >= 0)
+ {
+ var afterMarker = indexName[(markerIndex + marker.Length)..];
+ var dashIndex = afterMarker.IndexOf('-');
+ var env = dashIndex > 0 ? afterMarker[..dashIndex] : afterMarker;
+ if (!string.IsNullOrEmpty(env) && (dashIndex < 0 || char.IsDigit(afterMarker[dashIndex + 1])))
+ return env.ToLowerInvariant();
+ }
+ }
+
+ var envVar = config["DOTNET_ENVIRONMENT"]
+ ?? config["ENVIRONMENT"];
+
+ return !string.IsNullOrEmpty(envVar) ? envVar.ToLowerInvariant() : "dev";
+ }
+}
diff --git a/src/Elastic.Documentation/Elastic.Documentation.csproj b/src/Elastic.Documentation/Elastic.Documentation.csproj
index 99b59c073..fbc2f8c72 100644
--- a/src/Elastic.Documentation/Elastic.Documentation.csproj
+++ b/src/Elastic.Documentation/Elastic.Documentation.csproj
@@ -9,6 +9,7 @@
+
diff --git a/src/Elastic.Documentation/Search/ContentHash.cs b/src/Elastic.Documentation/Search/ContentHash.cs
new file mode 100644
index 000000000..17eb2e7ae
--- /dev/null
+++ b/src/Elastic.Documentation/Search/ContentHash.cs
@@ -0,0 +1,19 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+using System.Security.Cryptography;
+using System.Text;
+
+namespace Elastic.Documentation.Search;
+
+/// Creates a short hex hash from one or more string components.
+public static class ContentHash
+{
+ ///
+ /// Concatenates all components, computes SHA-256, and returns the first 16 hex characters (lowercased).
+ /// Compatible with HashedBulkUpdate.CreateHash.
+ ///
+ public static string Create(params string[] components) =>
+ Convert.ToHexString(SHA256.HashData(Encoding.UTF8.GetBytes(string.Join("", components))))[..16].ToLowerInvariant();
+}
diff --git a/src/Elastic.Documentation/Search/DocumentationDocument.cs b/src/Elastic.Documentation/Search/DocumentationDocument.cs
index e30a4b350..bfbaace5d 100644
--- a/src/Elastic.Documentation/Search/DocumentationDocument.cs
+++ b/src/Elastic.Documentation/Search/DocumentationDocument.cs
@@ -4,6 +4,7 @@
using System.Text.Json.Serialization;
using Elastic.Documentation.AppliesTo;
+using Elastic.Mapping;
namespace Elastic.Documentation.Search;
@@ -12,6 +13,7 @@ public record ParentDocument
[JsonPropertyName("title")]
public required string Title { get; set; }
+ [Keyword]
[JsonPropertyName("url")]
public required string Url { get; set; }
}
@@ -28,6 +30,7 @@ public record DocumentationDocument
[JsonPropertyName("search_title")]
public required string SearchTitle { get; set; }
+ [Keyword(Normalizer = "keyword_normalizer")]
[JsonPropertyName("type")]
public required string Type { get; set; } = "doc";
@@ -35,6 +38,7 @@ public record DocumentationDocument
/// The canonical/primary product for this document (nested object with id and repository).
/// Name and version are looked up dynamically by product id.
///
+ [Object]
[JsonPropertyName("product")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public IndexedProduct? Product { get; set; }
@@ -42,13 +46,18 @@ public record DocumentationDocument
///
/// All related products found during inference (from legacy mappings, applicability, etc.)
///
+ [Object]
[JsonPropertyName("related_products")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public IndexedProduct[]? RelatedProducts { get; set; }
+ [Id]
+ [Keyword]
[JsonPropertyName("url")]
public required string Url { get; set; } = string.Empty;
+ [ContentHash]
+ [Keyword]
[JsonPropertyName("hash")]
public string Hash { get; set; } = string.Empty;
@@ -58,27 +67,33 @@ public record DocumentationDocument
[JsonPropertyName("navigation_table_of_contents")]
public int NavigationTableOfContents { get; set; } = 50; //default to a high number so that omission gets penalized.
+ [Keyword(Normalizer = "keyword_normalizer")]
[JsonPropertyName("navigation_section")]
public string? NavigationSection { get; set; }
/// The date of the batch update this document was part of last.
/// This date could be higher than the date_last_updated.
+ [BatchIndexDate]
[JsonPropertyName("batch_index_date")]
public DateTimeOffset BatchIndexDate { get; set; }
/// The date this document was last updated,
+ [LastUpdated]
+ [Timestamp]
[JsonPropertyName("last_updated")]
public DateTimeOffset LastUpdated { get; set; }
[JsonPropertyName("description")]
public string? Description { get; set; }
+ [Text]
[JsonPropertyName("headings")]
public string[] Headings { get; set; } = [];
[JsonPropertyName("links")]
public string[] Links { get; set; } = [];
+ [Nested]
[JsonPropertyName("applies_to")]
public ApplicableTo? Applies { get; set; }
@@ -92,6 +107,7 @@ public record DocumentationDocument
[JsonPropertyName("abstract")]
public string? Abstract { get; set; }
+ [Object]
[JsonPropertyName("parents")]
public ParentDocument[] Parents { get; set; } = [];
@@ -105,6 +121,7 @@ public record DocumentationDocument
/// Key for enrichment cache lookups. Derived from normalized content + prompt hash.
/// Used by enrich processor to join AI-generated fields at index time.
///
+ [Keyword]
[JsonPropertyName("enrichment_key")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? EnrichmentKey { get; set; }
@@ -112,6 +129,7 @@ public record DocumentationDocument
///
/// 3-5 sentences dense with technical entities, API names, and core functionality for vector matching.
///
+ [Text]
[JsonPropertyName("ai_rag_optimized_summary")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? AiRagOptimizedSummary { get; set; }
@@ -119,6 +137,7 @@ public record DocumentationDocument
///
/// Exactly 5-10 words for a UI tooltip.
///
+ [Text]
[JsonPropertyName("ai_short_summary")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? AiShortSummary { get; set; }
@@ -126,6 +145,7 @@ public record DocumentationDocument
///
/// A 3-8 word keyword string representing a high-intent user search for this doc.
///
+ [Keyword]
[JsonPropertyName("ai_search_query")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? AiSearchQuery { get; set; }
@@ -133,6 +153,7 @@ public record DocumentationDocument
///
/// Array of 3-5 specific questions answered by this document.
///
+ [Text]
[JsonPropertyName("ai_questions")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string[]? AiQuestions { get; set; }
@@ -140,6 +161,7 @@ public record DocumentationDocument
///
/// Array of 2-4 specific use cases this doc helps with.
///
+ [Text]
[JsonPropertyName("ai_use_cases")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string[]? AiUseCases { get; set; }
@@ -148,6 +170,7 @@ public record DocumentationDocument
/// Hash of the LLM prompt templates used to generate AI fields.
/// Used to detect stale enrichments when prompts change.
///
+ [Keyword]
[JsonPropertyName("enrichment_prompt_hash")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? EnrichmentPromptHash { get; set; }
diff --git a/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs
new file mode 100644
index 000000000..0e1acc17f
--- /dev/null
+++ b/src/Elastic.Documentation/Search/DocumentationMappingConfig.cs
@@ -0,0 +1,150 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+using Elastic.Mapping;
+using Elastic.Mapping.Analysis;
+using Elastic.Mapping.Mappings;
+
+namespace Elastic.Documentation.Search;
+
+[ElasticsearchMappingContext]
+[Index(
+ NameTemplate = "docs-{type}.lexical-{env}",
+ DatePattern = "yyyy.MM.dd.HHmmss",
+ Configuration = typeof(LexicalConfig)
+)]
+[Index(
+ NameTemplate = "docs-{type}.semantic-{env}",
+ Variant = "Semantic",
+ DatePattern = "yyyy.MM.dd.HHmmss",
+ Configuration = typeof(SemanticConfig)
+)]
+public static partial class DocumentationMappingContext;
+
+public class LexicalConfig : IConfigureElasticsearch
+{
+ public MappingsBuilder ConfigureMappings(MappingsBuilder mappings) =>
+ ConfigureCommonMappings(mappings)
+ .StrippedBody(f => f
+ .Analyzer("synonyms_fixed_analyzer")
+ .SearchAnalyzer("synonyms_analyzer")
+ );
+
+ internal static MappingsBuilder ConfigureCommonMappings(MappingsBuilder m) => m
+ // Text fields with custom analyzers and multi-fields
+ .SearchTitle(f => f
+ .Analyzer("synonyms_fixed_analyzer")
+ .SearchAnalyzer("synonyms_analyzer")
+ .MultiField("completion", mf => mf.SearchAsYouType()
+ .Analyzer("synonyms_fixed_analyzer")
+ .SearchAnalyzer("synonyms_analyzer")))
+ .Title(f => f
+ .SearchAnalyzer("synonyms_analyzer")
+ .MultiField("keyword", mf => mf.Keyword().Normalizer("keyword_normalizer"))
+ .MultiField("starts_with", mf => mf.Text()
+ .Analyzer("starts_with_analyzer")
+ .SearchAnalyzer("starts_with_analyzer_search"))
+ .MultiField("completion", mf => mf.SearchAsYouType().SearchAnalyzer("synonyms_analyzer")))
+ .Abstract(f => f
+ .Analyzer("synonyms_fixed_analyzer")
+ .SearchAnalyzer("synonyms_analyzer"))
+ .Headings(f => f
+ .Analyzer("synonyms_fixed_analyzer")
+ .SearchAnalyzer("synonyms_analyzer"))
+ // JsonIgnore fields — [Text]/[Keyword] attributes handle the type,
+ // AddField only needed when custom analyzers are required
+ .AddField("ai_rag_optimized_summary", f => f.Text()
+ .Analyzer("synonyms_fixed_analyzer")
+ .SearchAnalyzer("synonyms_analyzer"))
+ // Keyword fields with multi-fields
+ .Url(f => f
+ .MultiField("match", mf => mf.Text())
+ .MultiField("prefix", mf => mf.Text().Analyzer("hierarchy_analyzer")))
+ // Rank features — no attribute available, must use AddField
+ .AddField("navigation_depth", f => f.RankFeature().PositiveScoreImpact(false))
+ .AddField("navigation_table_of_contents", f => f.RankFeature().PositiveScoreImpact(false))
+ // Nested applies_to — sub-fields don't match C# structure (custom JsonConverter)
+ .AddField("applies_to.type", f => f.Keyword().Normalizer("keyword_normalizer"))
+ .AddField("applies_to.sub-type", f => f.Keyword().Normalizer("keyword_normalizer"))
+ .AddField("applies_to.lifecycle", f => f.Keyword().Normalizer("keyword_normalizer"))
+ .AddField("applies_to.version", f => f.Version())
+ // Parent document multi-fields
+ .AddField("parents.url", f => f.Keyword()
+ .MultiField("match", mf => mf.Text())
+ .MultiField("prefix", mf => mf.Text().Analyzer("hierarchy_analyzer")))
+ .AddField("parents.title", f => f.Text()
+ .SearchAnalyzer("synonyms_analyzer")
+ .MultiField("keyword", mf => mf.Keyword()));
+}
+
+public class SemanticConfig : IConfigureElasticsearch
+{
+ private const string ElserInferenceId = ".elser-2-elastic";
+ private const string JinaInferenceId = ".jina-embeddings-v5-text-small";
+
+ public MappingsBuilder ConfigureMappings(MappingsBuilder mappings) =>
+ LexicalConfig.ConfigureCommonMappings(mappings)
+ .StrippedBody(s => s
+ .Analyzer("synonyms_fixed_analyzer")
+ .SearchAnalyzer("synonyms_analyzer")
+ )
+ // ELSER sparse embeddings
+ .AddField("title.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId))
+ .AddField("abstract.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId))
+ .AddField("ai_rag_optimized_summary.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId))
+ .AddField("ai_questions.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId))
+ .AddField("ai_use_cases.semantic_text", f => f.SemanticText().InferenceId(ElserInferenceId))
+ // Jina v5 dense embeddings
+ .AddField("title.jina", f => f.SemanticText().InferenceId(JinaInferenceId))
+ .AddField("abstract.jina", f => f.SemanticText().InferenceId(JinaInferenceId))
+ .AddField("ai_rag_optimized_summary.jina", f => f.SemanticText().InferenceId(JinaInferenceId))
+ .AddField("ai_questions.jina", f => f.SemanticText().InferenceId(JinaInferenceId))
+ .AddField("ai_use_cases.jina", f => f.SemanticText().InferenceId(JinaInferenceId));
+}
+
+///
+/// Builds analysis settings at runtime (includes synonyms that are loaded from configuration).
+///
+public static class DocumentationAnalysisFactory
+{
+ public static AnalysisBuilder BuildAnalysis(AnalysisBuilder analysis, string synonymSetName, string[] indexTimeSynonyms) => analysis
+ .Normalizer("keyword_normalizer", n => n.Custom()
+ .CharFilter("strip_non_word_chars")
+ .Filters("lowercase", "asciifolding", "trim"))
+ .Analyzer("starts_with_analyzer", a => a.Custom()
+ .Tokenizer("starts_with_tokenizer")
+ .Filter("lowercase"))
+ .Analyzer("starts_with_analyzer_search", a => a.Custom()
+ .Tokenizer("keyword")
+ .Filter("lowercase"))
+ .Analyzer("synonyms_fixed_analyzer", a => a.Custom()
+ .Tokenizer("group_tokenizer")
+ .Filters("lowercase", "synonyms_fixed_filter", "kstem"))
+ .Analyzer("synonyms_analyzer", a => a.Custom()
+ .Tokenizer("group_tokenizer")
+ .Filters("lowercase", "synonyms_filter", "kstem"))
+ .Analyzer("highlight_analyzer", a => a.Custom()
+ .Tokenizer("group_tokenizer")
+ .Filters("lowercase", "english_stop"))
+ .Analyzer("hierarchy_analyzer", a => a.Custom()
+ .Tokenizer("path_tokenizer"))
+ .CharFilter("strip_non_word_chars", cf => cf.PatternReplace()
+ .Pattern(@"\W")
+ .Replacement(" "))
+ .TokenFilter("synonyms_fixed_filter", tf => tf.SynonymGraph()
+ .Synonyms(indexTimeSynonyms))
+ .TokenFilter("synonyms_filter", tf => tf.SynonymGraph()
+ .SynonymsSet(synonymSetName)
+ .Updateable(true))
+ .TokenFilter("english_stop", tf => tf.Stop()
+ .Stopwords("_english_"))
+ .Tokenizer("starts_with_tokenizer", t => t.EdgeNGram()
+ .MinGram(1)
+ .MaxGram(10)
+ .TokenChars("letter", "digit", "symbol", "whitespace"))
+ .Tokenizer("group_tokenizer", t => t.CharGroup()
+ .TokenizeOnChars("whitespace", ",", ";", "?", "!", "(", ")", "&", "'", "\"", "/", "[", "]", "{", "}"))
+ .Tokenizer("path_tokenizer", t => t.PathHierarchy()
+ .Delimiter('/'));
+}
diff --git a/src/Elastic.Documentation/Search/IndexedProduct.cs b/src/Elastic.Documentation/Search/IndexedProduct.cs
index ee766fac1..cdb8925e8 100644
--- a/src/Elastic.Documentation/Search/IndexedProduct.cs
+++ b/src/Elastic.Documentation/Search/IndexedProduct.cs
@@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information
using System.Text.Json.Serialization;
+using Elastic.Mapping;
namespace Elastic.Documentation.Search;
@@ -15,12 +16,14 @@ public record IndexedProduct
///
/// The product ID from products.yml (e.g., "elasticsearch", "kibana", "apm-agent-java")
///
+ [Keyword(Normalizer = "keyword_normalizer")]
[JsonPropertyName("id")]
public string? Id { get; init; }
///
/// The repository name (e.g., "elasticsearch", "docs-content", "elastic-otel-java")
///
+ [Keyword(Normalizer = "keyword_normalizer")]
[JsonPropertyName("repository")]
public string? Repository { get; init; }
}
diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.Mapping.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.Mapping.cs
deleted file mode 100644
index 4e36f7a56..000000000
--- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.Mapping.cs
+++ /dev/null
@@ -1,260 +0,0 @@
-// Licensed to Elasticsearch B.V under one or more agreements.
-// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
-// See the LICENSE file in the project root for more information
-
-using Elastic.Documentation.Search;
-using Elastic.Ingest.Elasticsearch.Catalog;
-
-namespace Elastic.Markdown.Exporters.Elasticsearch;
-
-public abstract partial class ElasticsearchIngestChannel
- where TChannelOptions : CatalogIndexChannelOptionsBase
- where TChannel : CatalogIndexChannel
-{
- protected static string CreateMappingSetting(string synonymSetName, string[] synonyms, string? defaultPipeline = null)
- {
- var indexTimeSynonyms = $"[{string.Join(",", synonyms.Select(r => $"\"{r}\""))}]";
- var pipelineSetting = defaultPipeline is not null ? $"\"default_pipeline\": \"{defaultPipeline}\"," : "";
- // language=json
- return
- $$$"""
- {
- {{{pipelineSetting}}}
- "analysis": {
- "normalizer": {
- "keyword_normalizer": {
- "type": "custom",
- "char_filter": ["strip_non_word_chars"],
- "filter": ["lowercase", "asciifolding", "trim"]
- }
- },
- "analyzer": {
- "starts_with_analyzer": {
- "tokenizer": "starts_with_tokenizer",
- "filter": [ "lowercase" ]
- },
- "starts_with_analyzer_search": {
- "tokenizer": "keyword",
- "filter": [ "lowercase" ]
- },
- "synonyms_fixed_analyzer": {
- "tokenizer": "group_tokenizer",
- "filter": [
- "lowercase",
- "synonyms_fixed_filter",
- "kstem"
- ]
- },
- "synonyms_analyzer": {
- "tokenizer": "group_tokenizer",
- "filter": [
- "lowercase",
- "synonyms_filter",
- "kstem"
- ]
- },
- "highlight_analyzer": {
- "tokenizer": "group_tokenizer",
- "filter": [
- "lowercase",
- "english_stop"
- ]
- },
- "hierarchy_analyzer": { "tokenizer": "path_tokenizer" }
- },
- "char_filter": {
- "strip_non_word_chars": {
- "type": "pattern_replace",
- "pattern": "\\W",
- "replacement": " "
- }
- },
- "filter": {
- "synonyms_fixed_filter": {
- "type": "synonym_graph",
- "synonyms": {{{indexTimeSynonyms}}}
- },
- "synonyms_filter": {
- "type": "synonym_graph",
- "synonyms_set": "{{{synonymSetName}}}",
- "updateable": true
- },
- "english_stop": {
- "type": "stop",
- "stopwords": "_english_"
- }
- },
- "tokenizer": {
- "starts_with_tokenizer": {
- "type": "edge_ngram",
- "min_gram": 1,
- "max_gram": 10,
- "token_chars": [
- "letter",
- "digit",
- "symbol",
- "whitespace"
- ]
- },
- "group_tokenizer": {
- "type": "char_group",
- "tokenize_on_chars": [ "whitespace", ",", ";", "?", "!", "(", ")", "&", "'", "\"", "/", "[", "]", "{", "}" ]
- },
- "path_tokenizer": {
- "type": "path_hierarchy",
- "delimiter": "/"
- }
- }
- }
- }
- """;
- }
-
- // language=json
- protected static string CreateMapping(string? inferenceId) =>
- $$"""
- {
- "properties": {
- "type": { "type" : "keyword", "normalizer": "keyword_normalizer" },
- "product": {
- "type": "object",
- "properties": {
- "id": { "type": "keyword", "normalizer": "keyword_normalizer" },
- "repository": { "type": "keyword", "normalizer": "keyword_normalizer" }
- }
- },
- "related_products": {
- "type": "object",
- "properties": {
- "id": { "type": "keyword", "normalizer": "keyword_normalizer" },
- "repository": { "type": "keyword", "normalizer": "keyword_normalizer" }
- }
- },
- "url": {
- "type": "keyword",
- "fields": {
- "match": { "type": "text" },
- "prefix": { "type": "text", "analyzer" : "hierarchy_analyzer" }
- }
- },
- "navigation_depth" : { "type" : "rank_feature", "positive_score_impact": false },
- "navigation_table_of_contents" : { "type" : "rank_feature", "positive_score_impact": false },
- "navigation_section" : { "type" : "keyword", "normalizer": "keyword_normalizer" },
- "hidden" : {
- "type" : "boolean"
- },
- "applies_to" : {
- "type" : "nested",
- "properties" : {
- "type" : { "type" : "keyword", "normalizer": "keyword_normalizer" },
- "sub-type" : { "type" : "keyword", "normalizer": "keyword_normalizer" },
- "lifecycle" : { "type" : "keyword", "normalizer": "keyword_normalizer" },
- "version" : { "type" : "version" }
- }
- },
- "parents" : {
- "type" : "object",
- "properties" : {
- "url" : {
- "type": "keyword",
- "fields": {
- "match": { "type": "text" },
- "prefix": { "type": "text", "analyzer" : "hierarchy_analyzer" }
- }
- },
- "title": {
- "type": "text",
- "search_analyzer": "synonyms_analyzer",
- "fields": {
- "keyword": { "type": "keyword" }
- }
- }
- }
- },
- "hash" : { "type" : "keyword" },
- "enrichment_key" : { "type" : "keyword" },
- "search_title": {
- "type": "text",
- "analyzer": "synonyms_fixed_analyzer",
- "search_analyzer": "synonyms_analyzer",
- "fields": {
- "completion": {
- "type": "search_as_you_type",
- "analyzer": "synonyms_fixed_analyzer",
- "search_analyzer": "synonyms_analyzer",
- "term_vector": "with_positions_offsets",
- "index_options": "offsets"
- }
- }
- },
- "title": {
- "type": "text",
- "search_analyzer": "synonyms_analyzer",
- "fields": {
- "keyword": { "type": "keyword", "normalizer": "keyword_normalizer" },
- "starts_with": { "type": "text", "analyzer": "starts_with_analyzer", "search_analyzer": "starts_with_analyzer_search" },
- "completion": { "type": "search_as_you_type", "search_analyzer": "synonyms_analyzer" }
- {{(!string.IsNullOrWhiteSpace(inferenceId) ? $$""", "semantic_text": {{{InferenceMapping(inferenceId)}}}""" : "")}}
- }
- },
- "body": {
- "type": "text"
- },
- "stripped_body": {
- "type": "text",
- "analyzer": "synonyms_fixed_analyzer",
- "search_analyzer": "synonyms_analyzer",
- "term_vector": "with_positions_offsets"
- },
- "headings": {
- "type": "text",
- "analyzer": "synonyms_fixed_analyzer",
- "search_analyzer": "synonyms_analyzer"
- },
- "abstract": {
- "type" : "text",
- "analyzer": "synonyms_fixed_analyzer",
- "search_analyzer": "synonyms_analyzer",
- "fields" : {
- {{(!string.IsNullOrWhiteSpace(inferenceId) ? $"\"semantic_text\": {{{InferenceMapping(inferenceId)}}}" : "")}}
- }
- },
- "ai_rag_optimized_summary": {
- "type": "text",
- "analyzer": "synonyms_fixed_analyzer",
- "search_analyzer": "synonyms_analyzer",
- "fields": {
- {{(!string.IsNullOrWhiteSpace(inferenceId) ? $"\"semantic_text\": {{{InferenceMapping(inferenceId)}}}" : "")}}
- }
- },
- "ai_short_summary": {
- "type": "text"
- },
- "ai_search_query": {
- "type": "keyword"
- },
- "ai_questions": {
- "type": "text",
- "fields": {
- {{(!string.IsNullOrWhiteSpace(inferenceId) ? $"\"semantic_text\": {{{InferenceMapping(inferenceId)}}}" : "")}}
- }
- },
- "ai_use_cases": {
- "type": "text",
- "fields": {
- {{(!string.IsNullOrWhiteSpace(inferenceId) ? $"\"semantic_text\": {{{InferenceMapping(inferenceId)}}}" : "")}}
- }
- },
- "enrichment_prompt_hash": {
- "type": "keyword"
- }
- }
- }
- """;
-
- private static string InferenceMapping(string inferenceId) =>
- $"""
- "type": "semantic_text",
- "inference_id": "{inferenceId}"
- """;
-}
diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.cs
deleted file mode 100644
index 6ff857956..000000000
--- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchIngestChannel.cs
+++ /dev/null
@@ -1,161 +0,0 @@
-// Licensed to Elasticsearch B.V under one or more agreements.
-// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
-// See the LICENSE file in the project root for more information
-
-using Elastic.Channels;
-using Elastic.Documentation.Configuration;
-using Elastic.Documentation.Diagnostics;
-using Elastic.Documentation.Search;
-using Elastic.Documentation.Serialization;
-using Elastic.Ingest.Elasticsearch.Catalog;
-using Elastic.Ingest.Elasticsearch.Indices;
-using Elastic.Ingest.Elasticsearch.Semantic;
-using Elastic.Transport;
-using Microsoft.Extensions.Logging;
-
-namespace Elastic.Markdown.Exporters.Elasticsearch;
-
-public class ElasticsearchLexicalIngestChannel(
- ILoggerFactory logFactory,
- IDiagnosticsCollector collector,
- ElasticsearchEndpoint endpoint,
- string indexNamespace,
- DistributedTransport transport,
- string[] indexTimeSynonyms,
- string? defaultPipeline = null
-)
- : ElasticsearchIngestChannel, CatalogIndexChannel>
- (logFactory, collector, endpoint, transport, o => new(o), t => new(t)
- {
- BulkOperationIdLookup = d => d.Url,
- // hash, last_updated and batch_index_date are all set before the docs are written to the channel
- ScriptedHashBulkUpsertLookup = (d, _) => new HashedBulkUpdate("hash", d.Hash, "ctx._source.batch_index_date = params.batch_index_date",
- new Dictionary
- {
- { "batch_index_date", d.BatchIndexDate.ToString("o") }
- }),
- GetMapping = () => CreateMapping(null),
- GetMappingSettings = () => CreateMappingSetting($"docs-{indexNamespace}", indexTimeSynonyms, defaultPipeline),
- IndexFormat =
- $"{endpoint.IndexNamePrefix.Replace("semantic", "lexical").ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}-{{0:yyyy.MM.dd.HHmmss}}",
- ActiveSearchAlias = $"{endpoint.IndexNamePrefix.Replace("semantic", "lexical").ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}"
- });
-
-public class ElasticsearchSemanticIngestChannel(
- ILoggerFactory logFactory,
- IDiagnosticsCollector collector,
- ElasticsearchEndpoint endpoint,
- string indexNamespace,
- DistributedTransport transport,
- string[] indexTimeSynonyms,
- string? defaultPipeline = null
-)
- : ElasticsearchIngestChannel, SemanticIndexChannel>
- (logFactory, collector, endpoint, transport, o => new(o), t => new(t)
- {
- BulkOperationIdLookup = d => d.Url,
- GetMapping = (inferenceId, _) => CreateMapping(inferenceId),
- GetMappingSettings = (_, _) => CreateMappingSetting($"docs-{indexNamespace}", indexTimeSynonyms, defaultPipeline),
- IndexFormat = $"{endpoint.IndexNamePrefix.ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}-{{0:yyyy.MM.dd.HHmmss}}",
- ActiveSearchAlias = $"{endpoint.IndexNamePrefix}-{indexNamespace.ToLowerInvariant()}",
- IndexNumThreads = endpoint.IndexNumThreads,
- SearchNumThreads = endpoint.SearchNumThreads,
- InferenceCreateTimeout = TimeSpan.FromMinutes(endpoint.BootstrapTimeout ?? 4),
- UsePreexistingInferenceIds = !endpoint.NoElasticInferenceService,
- InferenceId = endpoint.NoElasticInferenceService ? null : ".elser-2-elastic",
- SearchInferenceId = endpoint.NoElasticInferenceService ? null : ".elser-2-elastic"
- });
-
-public abstract partial class ElasticsearchIngestChannel : IDisposable
- where TChannelOptions : CatalogIndexChannelOptionsBase
- where TChannel : CatalogIndexChannel
-{
- private readonly IDiagnosticsCollector _collector;
- public TChannel Channel { get; }
- private readonly ILogger _logger;
-
- protected ElasticsearchIngestChannel(
- ILoggerFactory logFactory,
- IDiagnosticsCollector collector,
- ElasticsearchEndpoint endpoint,
- DistributedTransport transport,
- Func createChannel,
- Func createOptions
- )
- {
- _collector = collector;
- _logger = logFactory.CreateLogger>();
- //The max num threads per allocated node, from testing its best to limit our max concurrency
- //producing to this number as well
- var options = createOptions(transport);
- var i = 0;
- options.BufferOptions = new BufferOptions
- {
- OutboundBufferMaxSize = endpoint.BufferSize,
- ExportMaxConcurrency = endpoint.IndexNumThreads,
- ExportMaxRetries = endpoint.MaxRetries
- };
- options.SerializerContext = SourceGenerationContext.Default;
- options.ExportBufferCallback = () =>
- {
- var count = Interlocked.Increment(ref i);
- _logger.LogInformation("Exported {Count} documents to Elasticsearch index {IndexName}",
- count * endpoint.BufferSize, Channel?.IndexName ?? string.Format(options.IndexFormat, "latest"));
- };
- options.ExportExceptionCallback = e =>
- {
- _logger.LogError(e, "Failed to export document");
- _collector.EmitGlobalError("Elasticsearch export: failed to export document", e);
- };
- options.ServerRejectionCallback = items =>
- {
- foreach (var (doc, responseItem) in items)
- {
- _collector.EmitGlobalError(
- $"Server rejection: {responseItem.Status} {responseItem.Error?.Type} {responseItem.Error?.Reason} for document {doc.Url}");
- }
- };
- Channel = createChannel(options);
- _logger.LogInformation("Created {Channel} Elasticsearch target for indexing", typeof(TChannel).Name);
- }
-
- public async ValueTask StopAsync(Cancel ctx = default)
- {
- _logger.LogInformation("Waiting to drain all inflight exports to Elasticsearch");
- var drained = await Channel.WaitForDrainAsync(null, ctx);
- if (!drained)
- _collector.EmitGlobalError("Elasticsearch export: failed to complete indexing in a timely fashion while shutting down");
-
- _logger.LogInformation("Refreshing target index {Index}", Channel.IndexName);
- var refreshed = await Channel.RefreshAsync(ctx);
- if (!refreshed)
- _collector.EmitGlobalError($"Refreshing target index {Channel.IndexName} did not complete successfully");
-
- _logger.LogInformation("Applying aliases to {Index}", Channel.IndexName);
- var swapped = await Channel.ApplyAliasesAsync(ctx);
- if (!swapped)
- _collector.EmitGlobalError($"${nameof(ElasticsearchMarkdownExporter)} failed to apply aliases to index {Channel.IndexName}");
-
- return drained && refreshed && swapped;
- }
-
- public async ValueTask RefreshAsync(Cancel ctx = default) => await Channel.RefreshAsync(ctx);
-
- public async ValueTask TryWrite(DocumentationDocument document, Cancel ctx = default)
- {
- if (Channel.TryWrite(document))
- return true;
-
- if (await Channel.WaitToWriteAsync(ctx))
- return Channel.TryWrite(document);
- return false;
- }
-
- public void Dispose()
- {
- Channel.Complete();
- Channel.Dispose();
-
- GC.SuppressFinalize(this);
- }
-}
diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs
index 00f4d65a0..e69dfc9e5 100644
--- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs
+++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs
@@ -27,8 +27,8 @@ public partial class ElasticsearchMarkdownExporter
///
private void AssignDocumentMetadata(DocumentationDocument doc)
{
- var semanticHash = _semanticChannel.Channel.ChannelHash;
- var lexicalHash = _lexicalChannel.Channel.ChannelHash;
+ var semanticHash = _semanticTypeContext?.Hash ?? string.Empty;
+ var lexicalHash = _lexicalTypeContext.Hash;
var hash = HashedBulkUpdate.CreateHash(semanticHash, lexicalHash,
doc.Url, doc.Type, doc.StrippedBody ?? string.Empty, string.Join(",", doc.Headings.OrderBy(h => h)),
doc.SearchTitle ?? string.Empty,
@@ -37,8 +37,6 @@ private void AssignDocumentMetadata(DocumentationDocument doc)
_fixedSynonymsHash
);
doc.Hash = hash;
- doc.LastUpdated = _batchIndexDate;
- doc.BatchIndexDate = _batchIndexDate;
}
private static void CommonEnrichments(DocumentationDocument doc, INavigationItem? navigationItem)
@@ -165,9 +163,7 @@ public async ValueTask ExportAsync(MarkdownExportFileContext fileContext,
AssignDocumentMetadata(doc);
- if (_indexStrategy == IngestStrategy.Multiplex)
- return await _lexicalChannel.TryWrite(doc, ctx) && await _semanticChannel.TryWrite(doc, ctx);
- return await _lexicalChannel.TryWrite(doc, ctx);
+ return await WriteDocumentAsync(doc, ctx);
}
///
@@ -209,22 +205,10 @@ public async ValueTask FinishExportAsync(IDirectoryInfo outputFolder, Canc
AssignDocumentMetadata(doc);
- // Write to channels following the multiplex or reindex strategy
- if (_indexStrategy == IngestStrategy.Multiplex)
+ if (!await WriteDocumentAsync(doc, ctx))
{
- if (!await _lexicalChannel.TryWrite(doc, ctx) || !await _semanticChannel.TryWrite(doc, ctx))
- {
- _logger.LogError("Failed to write OpenAPI document {Url}", doc.Url);
- return false;
- }
- }
- else
- {
- if (!await _lexicalChannel.TryWrite(doc, ctx))
- {
- _logger.LogError("Failed to write OpenAPI document {Url}", doc.Url);
- return false;
- }
+ _logger.LogError("Failed to write OpenAPI document {Url}", doc.Url);
+ return false;
}
}
diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs
index 5220bfe39..6a6a510dd 100644
--- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs
+++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs
@@ -4,38 +4,37 @@
using System.Text.Json;
using System.Text.Json.Serialization;
+using Elastic.Channels;
using Elastic.Documentation.Configuration;
using Elastic.Documentation.Configuration.Search;
using Elastic.Documentation.Configuration.Versions;
using Elastic.Documentation.Diagnostics;
+using Elastic.Documentation.Search;
+using Elastic.Documentation.Serialization;
using Elastic.Ingest.Elasticsearch;
using Elastic.Ingest.Elasticsearch.Indices;
+using Elastic.Mapping;
using Elastic.Markdown.Exporters.Elasticsearch.Enrichment;
using Elastic.Transport;
using Microsoft.Extensions.Logging;
-using NetEscapades.EnumGenerators;
namespace Elastic.Markdown.Exporters.Elasticsearch;
-[EnumExtensions]
-public enum IngestStrategy { Reindex, Multiplex }
-
public partial class ElasticsearchMarkdownExporter : IMarkdownExporter, IDisposable
{
private readonly IDiagnosticsCollector _collector;
private readonly IDocumentationConfigurationContext _context;
private readonly ILogger _logger;
- private readonly ElasticsearchLexicalIngestChannel _lexicalChannel;
- private readonly ElasticsearchSemanticIngestChannel _semanticChannel;
-
private readonly ElasticsearchEndpoint _endpoint;
-
- private readonly DateTimeOffset _batchIndexDate = DateTimeOffset.UtcNow;
private readonly DistributedTransport _transport;
- private IngestStrategy _indexStrategy;
- private readonly string _indexNamespace;
- private string _currentLexicalHash = string.Empty;
- private string _currentSemanticHash = string.Empty;
+ private readonly string _buildType;
+
+ // Ingest: orchestrator for dual-index mode
+ private readonly IncrementalSyncOrchestrator _orchestrator;
+
+ // Type context hashes for document content hash computation
+ private readonly ElasticsearchTypeContext _lexicalTypeContext;
+ private readonly ElasticsearchTypeContext _semanticTypeContext;
private readonly IReadOnlyDictionary _synonyms;
private readonly IReadOnlyCollection _rules;
@@ -57,7 +56,7 @@ public ElasticsearchMarkdownExporter(
ILoggerFactory logFactory,
IDiagnosticsCollector collector,
DocumentationEndpoints endpoints,
- string indexNamespace,
+ string buildType,
IDocumentationConfigurationContext context
)
{
@@ -65,14 +64,14 @@ IDocumentationConfigurationContext context
_context = context;
_logger = logFactory.CreateLogger();
_endpoint = endpoints.Elasticsearch;
- _indexStrategy = IngestStrategy.Reindex;
- _indexNamespace = indexNamespace;
+ _buildType = buildType;
_versionsConfiguration = context.VersionsConfiguration;
_synonyms = context.SearchConfiguration.Synonyms;
_rules = context.SearchConfiguration.Rules;
var es = endpoints.Elasticsearch;
_transport = ElasticsearchTransportFactory.Create(es);
+ _operations = new ElasticsearchOperations(_transport, _logger, collector);
string[] fixedSynonyms = ["esql", "data-stream", "data-streams", "machine-learning"];
var indexTimeSynonyms = _synonyms.Aggregate(new List(), (acc, synonym) =>
@@ -83,15 +82,18 @@ IDocumentationConfigurationContext context
}).Where(r => fixedSynonyms.Contains(r.Id)).Select(r => r.Synonyms).ToArray();
_fixedSynonymsHash = HashedBulkUpdate.CreateHash(string.Join(",", indexTimeSynonyms));
- // Use AI enrichment pipeline if enabled - hybrid approach:
- // - Cache hits: enrich processor applies fields at index time
- // - Cache misses: apply fields inline before indexing
var aiPipeline = es.EnableAiEnrichment ? EnrichPolicyManager.PipelineName : null;
- _lexicalChannel = new ElasticsearchLexicalIngestChannel(logFactory, collector, es, indexNamespace, _transport, indexTimeSynonyms, aiPipeline);
- _semanticChannel = new ElasticsearchSemanticIngestChannel(logFactory, collector, es, indexNamespace, _transport, indexTimeSynonyms, aiPipeline);
+ var synonymSetName = $"docs-{buildType}";
- // Initialize shared ES operations
- _operations = new ElasticsearchOperations(_transport, _logger, collector);
+ var pipelineSettings = aiPipeline is not null
+ ? new Dictionary { ["index.default_pipeline"] = aiPipeline }
+ : null;
+
+ _lexicalTypeContext = DocumentationMappingContext.DocumentationDocument.CreateContext(type: buildType) with
+ {
+ ConfigureAnalysis = a => DocumentationAnalysisFactory.BuildAnalysis(a, synonymSetName, indexTimeSynonyms),
+ IndexSettings = pipelineSettings
+ };
// Initialize AI enrichment services if enabled
if (es.EnableAiEnrichment)
@@ -100,81 +102,136 @@ IDocumentationConfigurationContext context
_llmClient = new ElasticsearchLlmClient(_transport, logFactory.CreateLogger(), _operations);
_enrichPolicyManager = new EnrichPolicyManager(_transport, logFactory.CreateLogger(), _enrichmentCache.IndexName);
}
+
+ _semanticTypeContext = DocumentationMappingContext.DocumentationDocumentSemantic.CreateContext(type: buildType) with
+ {
+ ConfigureAnalysis = a => DocumentationAnalysisFactory.BuildAnalysis(a, synonymSetName, indexTimeSynonyms),
+ IndexSettings = pipelineSettings
+ };
+
+ var resolver = DocumentationMappingContext.DocumentationDocument;
+ _orchestrator = new IncrementalSyncOrchestrator(
+ _transport, _lexicalTypeContext, _semanticTypeContext,
+ setBatchIndexDate: resolver.SetBatchIndexDate,
+ setLastUpdated: resolver.SetLastUpdated)
+ {
+ ConfigurePrimary = ConfigureChannelOptions,
+ ConfigureSecondary = ConfigureChannelOptions,
+ OnPostComplete = es.EnableAiEnrichment
+ ? async (ctx, _, ct) => await PostCompleteAsync(ctx, ct)
+ : null
+ };
+ _ = _orchestrator.AddPreBootstrapTask(async (_, ct) =>
+ {
+ await InitializeEnrichmentAsync(ct);
+ await PublishSynonymsAsync(ct);
+ await PublishQueryRulesAsync(ct);
+ });
+ }
+
+ private void ConfigureChannelOptions(IngestChannelOptions options)
+ {
+ options.BufferOptions = new BufferOptions
+ {
+ OutboundBufferMaxSize = _endpoint.BufferSize,
+ ExportMaxConcurrency = _endpoint.IndexNumThreads,
+ ExportMaxRetries = _endpoint.MaxRetries
+ };
+ options.SerializerContext = SourceGenerationContext.Default;
+ options.ExportExceptionCallback = e =>
+ {
+ _logger.LogError(e, "Failed to export document");
+ _collector.EmitGlobalError("Elasticsearch export: failed to export document", e);
+ };
+ options.ServerRejectionCallback = items =>
+ {
+ foreach (var (doc, responseItem) in items)
+ {
+ _collector.EmitGlobalError(
+ $"Server rejection: {responseItem.Status} {responseItem.Error?.Type} {responseItem.Error?.Reason} for document {doc.Url}");
+ }
+ };
}
///
public async ValueTask StartAsync(Cancel ctx = default)
{
- // Initialize AI enrichment cache (pre-loads existing hashes into memory)
- if (_enrichmentCache is not null && _enrichPolicyManager is not null)
- {
- _logger.LogInformation("Initializing AI enrichment cache...");
- await _enrichmentCache.InitializeAsync(ctx);
- _logger.LogInformation("AI enrichment cache ready with {Count} existing entries", _enrichmentCache.Count);
-
- // The enrich pipeline must exist before indexing (used as default_pipeline).
- // The pipeline's enrich processor requires the .enrich-* index to exist,
- // which is created by executing the policy. We execute even with an empty
- // cache index - it just creates an empty enrich index that returns no matches.
- _logger.LogInformation("Setting up enrich policy and pipeline...");
- await _enrichPolicyManager.ExecutePolicyAsync(ctx);
- await _enrichPolicyManager.EnsurePipelineExistsAsync(ctx);
- }
+ _ = await _orchestrator.StartAsync(BootstrapMethod.Failure, ctx);
+ _logger.LogInformation("Orchestrator started with {Strategy} strategy", _orchestrator.Strategy);
+ }
- _currentLexicalHash = await _lexicalChannel.Channel.GetIndexTemplateHashAsync(ctx) ?? string.Empty;
- _currentSemanticHash = await _semanticChannel.Channel.GetIndexTemplateHashAsync(ctx) ?? string.Empty;
+ ///
+ public async ValueTask StopAsync(Cancel ctx = default) =>
+ _ = await _orchestrator.CompleteAsync(null, ctx);
- await PublishSynonymsAsync(ctx);
- await PublishQueryRulesAsync(ctx);
- _ = await _lexicalChannel.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Failure, null, ctx);
+ private async Task InitializeEnrichmentAsync(Cancel ctx)
+ {
+ if (_enrichmentCache is null || _enrichPolicyManager is null)
+ return;
- // if the previous hash does not match the current hash, we know already we want to multiplex to a new index
- if (_currentLexicalHash != _lexicalChannel.Channel.ChannelHash)
- _indexStrategy = IngestStrategy.Multiplex;
+ _logger.LogInformation("Initializing AI enrichment cache...");
+ await _enrichmentCache.InitializeAsync(ctx);
+ _logger.LogInformation("AI enrichment cache ready with {Count} existing entries", _enrichmentCache.Count);
- if (!_endpoint.NoSemantic)
- {
- var semanticWriteAlias = string.Format(_semanticChannel.Channel.Options.IndexFormat, "latest");
- var semanticIndexAvailable = await _transport.HeadAsync(semanticWriteAlias, ctx);
- if (!semanticIndexAvailable.ApiCallDetails.HasSuccessfulStatusCode && _endpoint is { ForceReindex: false, NoSemantic: false })
- {
- _indexStrategy = IngestStrategy.Multiplex;
- _logger.LogInformation("Index strategy set to multiplex because {SemanticIndex} does not exist, pass --force-reindex to always use reindex", semanticWriteAlias);
- }
+ _logger.LogInformation("Setting up enrich policy and pipeline...");
+ await _enrichPolicyManager.ExecutePolicyAsync(ctx);
+ await _enrichPolicyManager.EnsurePipelineExistsAsync(ctx);
+ }
- //try re-use index if we are re-indexing. Multiplex should always go to a new index
- _semanticChannel.Channel.Options.TryReuseIndex = _indexStrategy == IngestStrategy.Reindex;
- _ = await _semanticChannel.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Failure, null, ctx);
- }
+ private async Task PostCompleteAsync(OrchestratorContext context, Cancel ctx) =>
+ await ExecuteEnrichPolicyIfNeededAsync(context.SecondaryWriteAlias, ctx);
- var lexicalIndexExists = await IndexExists(_lexicalChannel.Channel.IndexName) ? "existing" : "new";
- var semanticIndexExists = await IndexExists(_semanticChannel.Channel.IndexName) ? "existing" : "new";
- if (_currentLexicalHash != _lexicalChannel.Channel.ChannelHash)
- {
- _indexStrategy = IngestStrategy.Multiplex;
- _logger.LogInformation("Multiplexing lexical new index: '{Index}' since current hash on server '{HashCurrent}' does not match new '{HashNew}'",
- _lexicalChannel.Channel.IndexName, _currentLexicalHash, _lexicalChannel.Channel.ChannelHash);
- }
- else
- _logger.LogInformation("Targeting {State} lexical: '{Index}'", lexicalIndexExists, _lexicalChannel.Channel.IndexName);
+ private async ValueTask ExecuteEnrichPolicyIfNeededAsync(string? semanticAlias, Cancel ctx)
+ {
+ if (_enrichmentCache is null || _enrichPolicyManager is null)
+ return;
- if (!_endpoint.NoSemantic && _currentSemanticHash != _semanticChannel.Channel.ChannelHash)
+ _logger.LogInformation(
+ "AI enrichment complete: {CacheHits} cache hits, {Enrichments} enrichments generated (limit: {Limit})",
+ _cacheHitCount, _enrichmentCount, _enrichmentOptions.MaxNewEnrichmentsPerRun);
+
+ if (_enrichmentCache.Count > 0)
{
- _indexStrategy = IngestStrategy.Multiplex;
- _logger.LogInformation("Multiplexing new index '{Index}' since current hash on server '{HashCurrent}' does not match new '{HashNew}'",
- _semanticChannel.Channel.IndexName, _currentSemanticHash, _semanticChannel.Channel.ChannelHash);
+ _logger.LogInformation("Executing enrich policy to update internal index with {Count} total entries...", _enrichmentCache.Count);
+ await _enrichPolicyManager.ExecutePolicyAsync(ctx);
+
+ if (semanticAlias is not null)
+ await BackfillMissingAiFieldsAsync(semanticAlias, ctx);
}
- else if (!_endpoint.NoSemantic)
- _logger.LogInformation("Targeting {State} semantical: '{Index}'", semanticIndexExists, _semanticChannel.Channel.IndexName);
+ }
- _logger.LogInformation("Using {IndexStrategy} to sync lexical index to semantic index", _indexStrategy.ToStringFast(true));
+ private async ValueTask BackfillMissingAiFieldsAsync(string semanticAlias, Cancel ctx)
+ {
+ if (_enrichmentCache is null || _llmClient is null)
+ return;
+
+ var currentPromptHash = ElasticsearchLlmClient.PromptHash;
+
+ _logger.LogInformation(
+ "Starting AI backfill for documents missing or stale AI fields (cache has {CacheCount} entries, prompt hash: {PromptHash})",
+ _enrichmentCache.Count, currentPromptHash[..8]);
- async ValueTask IndexExists(string name) => (await _transport.HeadAsync(name, ctx)).ApiCallDetails.HasSuccessfulStatusCode;
+ var query = $$"""
+ {
+ "query": {
+ "bool": {
+ "must": { "exists": { "field": "enrichment_key" } },
+ "should": [
+ { "bool": { "must_not": { "exists": { "field": "ai_questions" } } } },
+ { "bool": { "must_not": { "term": { "enrichment_prompt_hash": "{{currentPromptHash}}" } } } }
+ ],
+ "minimum_should_match": 1
+ }
+ }
+ }
+ """;
+
+ await _operations.UpdateByQueryAsync(semanticAlias, PostData.String(query), EnrichPolicyManager.PipelineName, ctx);
}
private async Task PublishSynonymsAsync(Cancel ctx)
{
- var setName = $"docs-{_indexNamespace}";
+ var setName = $"docs-{_buildType}";
_logger.LogInformation("Publishing synonym set '{SetName}' to Elasticsearch", setName);
var synonymRules = _synonyms.Aggregate(new List(), (acc, synonym) =>
@@ -198,7 +255,8 @@ private async Task PutSynonyms(SynonymsSet synonymsSet, string setName, Cancel c
ctx);
if (!response.ApiCallDetails.HasSuccessfulStatusCode)
- _collector.EmitGlobalError($"Failed to publish synonym set '{setName}'. Reason: {response.ApiCallDetails.OriginalException?.Message ?? response.ToString()}");
+ _collector.EmitGlobalError(
+ $"Failed to publish synonym set '{setName}'. Reason: {response.ApiCallDetails.OriginalException?.Message ?? response.ToString()}");
else
_logger.LogInformation("Successfully published synonym set '{SetName}'.", setName);
}
@@ -211,7 +269,7 @@ private async Task PublishQueryRulesAsync(Cancel ctx)
return;
}
- var rulesetName = $"docs-ruleset-{_indexNamespace}";
+ var rulesetName = $"docs-ruleset-{_buildType}";
_logger.LogInformation("Publishing query ruleset '{RulesetName}' with {Count} rules to Elasticsearch", rulesetName, _rules.Count);
var rulesetRules = _rules.Select(r => new QueryRulesetRule
@@ -241,241 +299,24 @@ private async Task PutQueryRuleset(QueryRuleset ruleset, string rulesetName, Can
ctx);
if (!response.ApiCallDetails.HasSuccessfulStatusCode)
- _collector.EmitGlobalError($"Failed to publish query ruleset '{rulesetName}'. Reason: {response.ApiCallDetails.OriginalException?.Message ?? response.ToString()}");
+ _collector.EmitGlobalError(
+ $"Failed to publish query ruleset '{rulesetName}'. Reason: {response.ApiCallDetails.OriginalException?.Message ?? response.ToString()}");
else
_logger.LogInformation("Successfully published query ruleset '{RulesetName}'.", rulesetName);
}
- private async ValueTask CountAsync(string index, string body, Cancel ctx = default)
+ internal async ValueTask WriteDocumentAsync(DocumentationDocument doc, Cancel ctx)
{
- var countResponse = await _operations.WithRetryAsync(
- () => _transport.PostAsync($"/{index}/_count", PostData.String(body), ctx),
- $"POST {index}/_count",
- ctx);
- return countResponse.Body.Get("count");
+ if (_orchestrator.TryWrite(doc))
+ return true;
+ _ = await _orchestrator.WaitToWriteAsync(doc, ctx);
+ return true;
}
- ///
- public async ValueTask StopAsync(Cancel ctx = default)
- {
- var semanticWriteAlias = string.Format(_semanticChannel.Channel.Options.IndexFormat, "latest");
- var lexicalWriteAlias = string.Format(_lexicalChannel.Channel.Options.IndexFormat, "latest");
-
- var stopped = await _lexicalChannel.StopAsync(ctx);
- if (!stopped)
- throw new Exception($"Failed to stop {_lexicalChannel.GetType().Name}");
-
- await QueryIngestStatistics(lexicalWriteAlias, ctx);
-
- if (_indexStrategy == IngestStrategy.Multiplex)
- {
- if (!_endpoint.NoSemantic)
- _ = await _semanticChannel.StopAsync(ctx);
-
- // cleanup lexical index of old data
- await DoDeleteByQuery(lexicalWriteAlias, ctx);
- // need to refresh the lexical index to ensure that the delete by query is available
- _ = await _lexicalChannel.RefreshAsync(ctx);
- await QueryDocumentCounts(ctx);
- // ReSharper disable once ConvertIfStatementToConditionalTernaryExpression
- if (_endpoint.NoSemantic)
- _logger.LogInformation("Finish indexing {IndexStrategy} strategy", _indexStrategy.ToStringFast(true));
- else
- _logger.LogInformation("Finish syncing to semantic in {IndexStrategy} strategy", _indexStrategy.ToStringFast(true));
- return;
- }
-
- if (_endpoint.NoSemantic)
- {
- _logger.LogInformation("--no-semantic was specified so exiting early before reindexing to {Index}", lexicalWriteAlias);
- return;
- }
-
- var semanticIndex = _semanticChannel.Channel.IndexName;
- // check if the alias exists
- var semanticIndexHead = await _transport.HeadAsync(semanticWriteAlias, ctx);
- if (!semanticIndexHead.ApiCallDetails.HasSuccessfulStatusCode)
- {
- _logger.LogInformation("No semantic index exists yet, creating index {Index} for semantic search", semanticIndex);
- _ = await _semanticChannel.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Failure, null, ctx);
- var semanticIndexPut = await _transport.PutAsync(semanticIndex, PostData.String("{}"), ctx);
- if (!semanticIndexPut.ApiCallDetails.HasSuccessfulStatusCode)
- throw new Exception($"Failed to create index {semanticIndex}: {semanticIndexPut}");
- }
- var destinationIndex = _semanticChannel.Channel.IndexName;
-
- _logger.LogInformation("_reindex updates: '{SourceIndex}' => '{DestinationIndex}'", lexicalWriteAlias, destinationIndex);
- var request = PostData.String(@"
- {
- ""dest"": {
- ""index"": """ + destinationIndex + @"""
- },
- ""source"": {
- ""index"": """ + lexicalWriteAlias + @""",
- ""size"": 100,
- ""query"": {
- ""range"": {
- ""last_updated"": {
- ""gte"": """ + _batchIndexDate.ToString("o") + @"""
- }
- }
- }
- }
- }");
- await DoReindex(request, lexicalWriteAlias, destinationIndex, "updates", ctx);
-
- _logger.LogInformation("_reindex deletions: '{SourceIndex}' => '{DestinationIndex}'", lexicalWriteAlias, destinationIndex);
- request = PostData.String(@"
- {
- ""dest"": {
- ""index"": """ + destinationIndex + @"""
- },
- ""script"": {
- ""source"": ""ctx.op = \""delete\""""
- },
- ""source"": {
- ""index"": """ + lexicalWriteAlias + @""",
- ""size"": 100,
- ""query"": {
- ""range"": {
- ""batch_index_date"": {
- ""lt"": """ + _batchIndexDate.ToString("o") + @"""
- }
- }
- }
- }
- }");
- await DoReindex(request, lexicalWriteAlias, destinationIndex, "deletions", ctx);
-
- await DoDeleteByQuery(lexicalWriteAlias, ctx);
-
- _ = await _lexicalChannel.Channel.ApplyLatestAliasAsync(ctx);
- _ = await _semanticChannel.Channel.ApplyAliasesAsync(ctx);
-
- _ = await _lexicalChannel.RefreshAsync(ctx);
- _ = await _semanticChannel.RefreshAsync(ctx);
-
- _logger.LogInformation("Finish sync to semantic index using {IndexStrategy} strategy", _indexStrategy.ToStringFast(true));
- await QueryDocumentCounts(ctx);
-
- // Execute enrich policy so new cache entries are available for next run
- await ExecuteEnrichPolicyIfNeededAsync(ctx);
- }
-
- private async ValueTask ExecuteEnrichPolicyIfNeededAsync(Cancel ctx)
- {
- if (_enrichmentCache is null || _enrichPolicyManager is null)
- return;
-
- _logger.LogInformation(
- "AI enrichment complete: {CacheHits} cache hits, {Enrichments} enrichments generated (limit: {Limit})",
- _cacheHitCount, _enrichmentCount, _enrichmentOptions.MaxNewEnrichmentsPerRun);
-
- if (_enrichmentCache.Count > 0)
- {
- _logger.LogInformation("Executing enrich policy to update internal index with {Count} total entries...", _enrichmentCache.Count);
- await _enrichPolicyManager.ExecutePolicyAsync(ctx);
-
- // Backfill: Apply AI fields to documents that were skipped by hash-based upsert
- await BackfillMissingAiFieldsAsync(ctx);
- }
- }
-
- private async ValueTask BackfillMissingAiFieldsAsync(Cancel ctx)
- {
- // Why backfill is needed:
- // The exporter uses hash-based upsert - unchanged documents are skipped during indexing.
- // These skipped documents never pass through the ingest pipeline, so they miss AI fields.
- // This backfill runs _update_by_query with the AI pipeline to enrich those documents.
- //
- // Additionally, when prompts change, existing documents have stale AI fields.
- // We detect this by checking if the document's prompt_hash differs from the current one.
- //
- // Only backfill the semantic index - it's what the search API uses.
- // The lexical index is just an intermediate step for reindexing.
- if (_endpoint.NoSemantic || _enrichmentCache is null || _llmClient is null)
- return;
-
- var semanticAlias = _semanticChannel.Channel.Options.ActiveSearchAlias;
- var currentPromptHash = ElasticsearchLlmClient.PromptHash;
-
- _logger.LogInformation(
- "Starting AI backfill for documents missing or stale AI fields (cache has {CacheCount} entries, prompt hash: {PromptHash})",
- _enrichmentCache.Count, currentPromptHash[..8]);
-
- // Find documents with enrichment_key that either:
- // 1. Missing AI fields (never enriched), OR
- // 2. Have stale/missing enrichment_prompt_hash (enriched with old prompts)
- var query = $$"""
- {
- "query": {
- "bool": {
- "must": { "exists": { "field": "enrichment_key" } },
- "should": [
- { "bool": { "must_not": { "exists": { "field": "ai_questions" } } } },
- { "bool": { "must_not": { "term": { "enrichment_prompt_hash": "{{currentPromptHash}}" } } } }
- ],
- "minimum_should_match": 1
- }
- }
- }
- """;
-
- await RunBackfillQuery(semanticAlias, query, ctx);
- }
-
- private async ValueTask RunBackfillQuery(string indexAlias, string query, Cancel ctx) =>
- await _operations.UpdateByQueryAsync(indexAlias, PostData.String(query), EnrichPolicyManager.PipelineName, ctx);
-
- private async ValueTask QueryIngestStatistics(string lexicalWriteAlias, Cancel ctx)
- {
- var lexicalSearchAlias = _lexicalChannel.Channel.Options.ActiveSearchAlias;
- var updated = await CountAsync(lexicalSearchAlias, $$""" { "query": { "range": { "last_updated": { "gte": "{{_batchIndexDate:o}}" } } } }""", ctx);
- var total = await CountAsync(lexicalSearchAlias, $$""" { "query": { "range": { "batch_index_date": { "gte": "{{_batchIndexDate:o}}" } } } }""", ctx);
- var deleted = await CountAsync(lexicalSearchAlias, $$""" { "query": { "range": { "batch_index_date": { "lt": "{{_batchIndexDate:o}}" } } } }""", ctx);
-
- // TODO emit these as metrics
- _logger.LogInformation("Exported {Total}, Updated {Updated}, Deleted, {Deleted} documents to {LexicalIndex}", total, updated, deleted, lexicalWriteAlias);
- _logger.LogInformation("Syncing to semantic index using {IndexStrategy} strategy", _indexStrategy.ToStringFast(true));
- }
-
- private async ValueTask QueryDocumentCounts(Cancel ctx)
- {
- var semanticWriteAlias = string.Format(_semanticChannel.Channel.Options.IndexFormat, "latest");
- var lexicalWriteAlias = string.Format(_lexicalChannel.Channel.Options.IndexFormat, "latest");
- var totalLexical = await CountAsync(lexicalWriteAlias, "{}", ctx);
- var totalSemantic = await CountAsync(semanticWriteAlias, "{}", ctx);
-
- // TODO emit these as metrics
- _logger.LogInformation("Document counts -> Semantic Index: {TotalSemantic}, Lexical Index: {TotalLexical}", totalSemantic, totalLexical);
- }
-
- private async ValueTask DoDeleteByQuery(string lexicalWriteAlias, Cancel ctx)
- {
- // delete all documents with batch_index_date < _batchIndexDate
- // they weren't part of the current export
- _logger.LogInformation("Delete data in '{SourceIndex}' not part of batch date: {Date}", lexicalWriteAlias, _batchIndexDate.ToString("o"));
- var query = PostData.String(@"
- {
- ""query"": {
- ""range"": {
- ""batch_index_date"": {
- ""lt"": """ + _batchIndexDate.ToString("o") + @"""
- }
- }
- }
- }");
- await _operations.DeleteByQueryAsync(lexicalWriteAlias, query, ctx);
- }
-
- private async ValueTask DoReindex(PostData request, string lexicalWriteAlias, string semanticWriteAlias, string typeOfSync, Cancel ctx) =>
- await _operations.ReindexAsync(request, lexicalWriteAlias, semanticWriteAlias, typeOfSync, ctx);
-
///
public void Dispose()
{
- _lexicalChannel.Dispose();
- _semanticChannel.Dispose();
+ _orchestrator.Dispose();
_llmClient?.Dispose();
GC.SuppressFinalize(this);
}
diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchOperations.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchOperations.cs
index 4f94ae14a..3a3952406 100644
--- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchOperations.cs
+++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchOperations.cs
@@ -161,22 +161,6 @@ public async Task DeleteByQueryAsync(
await PollTaskUntilCompleteAsync(taskId, "_delete_by_query", index, null, ct);
}
- ///
- /// Executes a reindex operation and waits for completion.
- ///
- public async Task ReindexAsync(
- PostData request,
- string sourceIndex,
- string destIndex,
- string operationType,
- CancellationToken ct)
- {
- var url = "/_reindex?wait_for_completion=false&scroll=10m";
- var taskId = await PostAsyncTaskAsync(url, request, $"POST _reindex ({operationType})", ct);
- if (taskId is not null)
- await PollTaskUntilCompleteAsync(taskId, $"_reindex {operationType}", sourceIndex, destIndex, ct);
- }
-
///
/// Executes an update_by_query operation and waits for completion.
///
diff --git a/src/Elastic.Markdown/Exporters/ExporterExtensions.cs b/src/Elastic.Markdown/Exporters/ExporterExtensions.cs
index cec7388f3..6deb2a8c0 100644
--- a/src/Elastic.Markdown/Exporters/ExporterExtensions.cs
+++ b/src/Elastic.Markdown/Exporters/ExporterExtensions.cs
@@ -15,7 +15,7 @@ public static IReadOnlyCollection CreateMarkdownExporters(
this IReadOnlySet exportOptions,
ILoggerFactory logFactory,
IDocumentationConfigurationContext context,
- string indexNamespace
+ string buildType
)
{
var markdownExporters = new List(4);
@@ -24,7 +24,7 @@ string indexNamespace
if (exportOptions.Contains(Exporter.Configuration))
markdownExporters.Add(new ConfigurationExporter(logFactory, context.ConfigurationFileProvider, context));
if (exportOptions.Contains(Exporter.Elasticsearch))
- markdownExporters.Add(new ElasticsearchMarkdownExporter(logFactory, context.Collector, context.Endpoints, indexNamespace, context));
+ markdownExporters.Add(new ElasticsearchMarkdownExporter(logFactory, context.Collector, context.Endpoints, buildType, context));
return markdownExporters;
}
}
diff --git a/src/api/Elastic.Documentation.Api.App/Program.cs b/src/api/Elastic.Documentation.Api.App/Program.cs
index 2165dead6..ff255c280 100644
--- a/src/api/Elastic.Documentation.Api.App/Program.cs
+++ b/src/api/Elastic.Documentation.Api.App/Program.cs
@@ -4,6 +4,7 @@
using Elastic.Documentation.Api.Infrastructure;
using Elastic.Documentation.Api.Infrastructure.OpenTelemetry;
+using Elastic.Documentation.Configuration;
using Elastic.Documentation.Configuration.Assembler;
using Elastic.Documentation.Search;
using Elastic.Documentation.ServiceDefaults;
@@ -82,17 +83,22 @@ static void LogElasticsearchConfiguration(WebApplication app, ILogger logger)
{
try
{
- var esOptions = app.Services.GetService();
- if (esOptions != null)
+ var endpoints = app.Services.GetService();
+ if (endpoints is not null)
{
+ var endpoint = endpoints.Elasticsearch;
+ var searchIndex = DocumentationMappingContext.DocumentationDocumentSemantic
+ .CreateContext(type: "assembler")
+ .ResolveReadTarget();
logger.LogInformation(
- "Elasticsearch configuration - Url: {Url}, Index: {Index}",
- esOptions.Url,
- esOptions.IndexName
+ "Elasticsearch configuration - Url: {Url}, Namespace: {Namespace}, SearchIndex: {SearchIndex}",
+ endpoint.Uri,
+ endpoints.Namespace,
+ searchIndex
);
}
else
- logger.LogWarning("ElasticsearchOptions could not be resolved from DI");
+ logger.LogWarning("DocumentationEndpoints could not be resolved from DI");
}
catch (Exception ex)
{
diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/ElasticsearchAskAiMessageFeedbackGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/ElasticsearchAskAiMessageFeedbackGateway.cs
index 345d07ba5..99e20e5ec 100644
--- a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/ElasticsearchAskAiMessageFeedbackGateway.cs
+++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/ElasticsearchAskAiMessageFeedbackGateway.cs
@@ -7,7 +7,7 @@
using Elastic.Clients.Elasticsearch.Serialization;
using Elastic.Documentation.Api.Core;
using Elastic.Documentation.Api.Core.AskAi;
-using Elastic.Documentation.Search;
+using Elastic.Documentation.Configuration;
using Elastic.Transport;
using Microsoft.Extensions.Logging;
@@ -25,20 +25,27 @@ public sealed class ElasticsearchAskAiMessageFeedbackGateway : IAskAiMessageFeed
private bool _disposed;
public ElasticsearchAskAiMessageFeedbackGateway(
- ElasticsearchOptions elasticsearchOptions,
+ DocumentationEndpoints endpoints,
AppEnvironment appEnvironment,
ILogger logger)
{
_logger = logger;
_indexName = $"ask-ai-message-feedback-{appEnvironment.Current.ToStringFast(true)}";
- _nodePool = new SingleNodePool(new Uri(elasticsearchOptions.Url.Trim()));
+ var endpoint = endpoints.Elasticsearch;
+ _nodePool = new SingleNodePool(endpoint.Uri);
+ var auth = endpoint.ApiKey is { } apiKey
+ ? (AuthorizationHeader)new ApiKey(apiKey)
+ : endpoint is { Username: { } username, Password: { } password }
+ ? new BasicAuthentication(username, password)
+ : null!;
+
using var clientSettings = new ElasticsearchClientSettings(
_nodePool,
sourceSerializer: (_, settings) => new DefaultSourceSerializer(settings, MessageFeedbackJsonContext.Default)
)
.DefaultIndex(_indexName)
- .Authentication(new ApiKey(elasticsearchOptions.ApiKey));
+ .Authentication(auth);
_client = new ElasticsearchClient(clientSettings);
}
diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs
index 04db74d4d..8ec6d6d18 100644
--- a/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs
+++ b/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs
@@ -58,10 +58,6 @@ public async Task GetParam(string name, bool withDecryption = true, Canc
{
return GetEnv("DOCUMENTATION_KIBANA_APIKEY");
}
- case "docs-elasticsearch-index":
- {
- return GetEnv("DOCUMENTATION_ELASTIC_INDEX", "semantic-docs-dev-latest");
- }
default:
{
throw new ArgumentException($"Parameter '{name}' not found in {nameof(LocalParameterProvider)}");
diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs
index b5549d2c0..639367432 100644
--- a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs
+++ b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs
@@ -25,7 +25,7 @@ public class DocumentGateway(
{
var normalizedUrl = NormalizeUrl(url);
var response = await clientAccessor.Client.SearchAsync(s => s
- .Indices(clientAccessor.Options.IndexName)
+ .Indices(clientAccessor.SearchIndex)
.Query(q => q.Term(t => t.Field(f => f.Url).Value(normalizedUrl)))
.Size(1)
.Source(sf => sf.Filter(f => f.Includes(
@@ -104,7 +104,7 @@ public class DocumentGateway(
{
var normalizedUrl = NormalizeUrl(url);
var response = await clientAccessor.Client.SearchAsync(s => s
- .Indices(clientAccessor.Options.IndexName)
+ .Indices(clientAccessor.SearchIndex)
.Query(q => q.Term(t => t.Field(f => f.Url).Value(normalizedUrl)))
.Size(1)
.Source(sf => sf.Filter(f => f.Includes(
diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Program.cs b/src/api/Elastic.Documentation.Mcp.Remote/Program.cs
index 6b56eeda3..41615c7c1 100644
--- a/src/api/Elastic.Documentation.Mcp.Remote/Program.cs
+++ b/src/api/Elastic.Documentation.Mcp.Remote/Program.cs
@@ -5,6 +5,7 @@
using Elastic.Documentation.Api.Infrastructure.OpenTelemetry;
using Elastic.Documentation.Assembler.Links;
using Elastic.Documentation.Assembler.Mcp;
+using Elastic.Documentation.Configuration;
using Elastic.Documentation.LinkIndex;
using Elastic.Documentation.Links.InboundLinks;
using Elastic.Documentation.Mcp.Remote;
@@ -141,17 +142,22 @@ static void LogElasticsearchConfiguration(WebApplication app, ILogger logger)
{
try
{
- var esOptions = app.Services.GetService();
- if (esOptions != null)
+ var endpoints = app.Services.GetService();
+ if (endpoints is not null)
{
+ var endpoint = endpoints.Elasticsearch;
+ var searchIndex = DocumentationMappingContext.DocumentationDocumentSemantic
+ .CreateContext(type: "assembler")
+ .ResolveReadTarget();
logger.LogInformation(
- "Elasticsearch configuration - Url: {Url}, Index: {Index}",
- esOptions.Url,
- esOptions.IndexName
+ "Elasticsearch configuration - Url: {Url}, Namespace: {Namespace}, SearchIndex: {SearchIndex}",
+ endpoint.Uri,
+ endpoints.Namespace,
+ searchIndex
);
}
else
- logger.LogWarning("ElasticsearchOptions could not be resolved from DI");
+ logger.LogWarning("DocumentationEndpoints could not be resolved from DI");
}
catch (Exception ex)
{
diff --git a/src/api/Elastic.Documentation.Mcp.Remote/appsettings.development.json b/src/api/Elastic.Documentation.Mcp.Remote/appsettings.development.json
index 15cac94ee..34f00ef13 100644
--- a/src/api/Elastic.Documentation.Mcp.Remote/appsettings.development.json
+++ b/src/api/Elastic.Documentation.Mcp.Remote/appsettings.development.json
@@ -4,6 +4,5 @@
"Default": "Debug",
"Microsoft.AspNetCore": "Information"
}
- },
- "DOCUMENTATION_ELASTIC_INDEX": "semantic-docs-dev-latest"
+ }
}
diff --git a/src/api/Elastic.Documentation.Mcp.Remote/appsettings.edge.json b/src/api/Elastic.Documentation.Mcp.Remote/appsettings.edge.json
index fe7d17f7b..0c208ae91 100644
--- a/src/api/Elastic.Documentation.Mcp.Remote/appsettings.edge.json
+++ b/src/api/Elastic.Documentation.Mcp.Remote/appsettings.edge.json
@@ -4,6 +4,5 @@
"Default": "Information",
"Microsoft.AspNetCore": "Warning"
}
- },
- "DOCUMENTATION_ELASTIC_INDEX": "semantic-docs-edge-latest"
+ }
}
diff --git a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs
index bd0a6e760..941aa47c8 100644
--- a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs
+++ b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs
@@ -109,7 +109,7 @@ Cancel ctx
var builder = new AssemblerBuilder(logFactory, assembleContext, navigation, htmlWriter, pathProvider, historyMapper);
- await builder.BuildAllAsync(assembleContext.Environment, assembleSources.AssembleSets, exporters, ctx);
+ await builder.BuildAllAsync(assembleSources.AssembleSets, exporters, ctx);
if (exporters.Contains(Exporter.LinkMetadata))
await cloner.WriteLinkRegistrySnapshot(checkoutResult.LinkRegistrySnapshot, ctx);
diff --git a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuilder.cs b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuilder.cs
index e60954294..74b037059 100644
--- a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuilder.cs
+++ b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuilder.cs
@@ -38,7 +38,7 @@ public class AssemblerBuilder(
private ILegacyUrlMapper? LegacyUrlMapper { get; } = legacyUrlMapper;
- public async Task BuildAllAsync(PublishEnvironment environment, FrozenDictionary assembleSets, IReadOnlySet exportOptions, Cancel ctx)
+ public async Task BuildAllAsync(FrozenDictionary assembleSets, IReadOnlySet exportOptions, Cancel ctx)
{
if (context.OutputDirectory.Exists)
context.OutputDirectory.Delete(true);
@@ -48,7 +48,7 @@ public async Task BuildAllAsync(PublishEnvironment environment, FrozenDictionary
var buildTimes = new List<(string Name, int FileCount, TimeSpan Duration)>();
// Create exporters without inferrer - inferrer is created per-repository
- var markdownExporters = exportOptions.CreateMarkdownExporters(logFactory, context, environment.Name);
+ var markdownExporters = exportOptions.CreateMarkdownExporters(logFactory, context, "assembler");
var tasks = markdownExporters.Select(async e => await e.StartAsync(ctx));
await Task.WhenAll(tasks);
diff --git a/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs b/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs
index 1e44b5c93..323129a40 100644
--- a/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs
+++ b/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs
@@ -32,13 +32,11 @@ ICoreService githubActionsService
/// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY
/// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME
/// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD
- /// Index without semantic fields
/// Enable AI enrichment of documents using LLM-generated metadata
/// The number of search threads the inference endpoint should use. Defaults: 8
/// The number of index threads the inference endpoint should use. Defaults: 8
/// Do not use the Elastic Inference Service, bootstrap inference endpoint
/// Timeout in minutes for the inference endpoint creation. Defaults: 4
- /// The prefix for the computed index/alias names. Defaults: semantic-docs
/// Force reindex strategy to semantic index
/// The number of documents to send to ES as part of the bulk. Defaults: 100
/// The number of times failed bulk items should be retried. Defaults: 3
@@ -60,14 +58,12 @@ public async Task Index(IDiagnosticsCollector collector,
string? username = null,
string? password = null,
// inference options
- bool? noSemantic = null,
bool? enableAiEnrichment = null,
int? searchNumThreads = null,
int? indexNumThreads = null,
bool? noEis = null,
int? bootstrapTimeout = null,
// index options
- string? indexNamePrefix = null,
bool? forceReindex = null,
// channel buffer options
int? bufferSize = null,
@@ -91,13 +87,11 @@ public async Task Index(IDiagnosticsCollector collector,
ApiKey = apiKey,
Username = username,
Password = password,
- NoSemantic = noSemantic,
EnableAiEnrichment = enableAiEnrichment,
SearchNumThreads = searchNumThreads,
IndexNumThreads = indexNumThreads,
NoEis = noEis,
BootstrapTimeout = bootstrapTimeout,
- IndexNamePrefix = indexNamePrefix,
ForceReindex = forceReindex,
BufferSize = bufferSize,
MaxRetries = maxRetries,
diff --git a/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs b/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs
index c4fcc6c0f..19e060b1b 100644
--- a/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs
+++ b/src/services/Elastic.Documentation.Isolated/IsolatedIndexService.cs
@@ -29,13 +29,11 @@ ICoreService githubActionsService
/// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY
/// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME
/// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD
- /// Index without semantic fields
/// Enable AI enrichment of documents using LLM-generated metadata
/// The number of search threads the inference endpoint should use. Defaults: 8
/// The number of index threads the inference endpoint should use. Defaults: 8
/// Do not use the Elastic Inference Service, bootstrap inference endpoint
/// Timeout in minutes for the inference endpoint creation. Defaults: 4
- /// The prefix for the computed index/alias names. Defaults: semantic-docs
/// Force reindex strategy to semantic index
/// The number of documents to send to ES as part of the bulk. Defaults: 100
/// The number of times failed bulk items should be retried. Defaults: 3
@@ -57,14 +55,12 @@ public async Task Index(IDiagnosticsCollector collector,
string? username = null,
string? password = null,
// inference options
- bool? noSemantic = null,
bool? enableAiEnrichment = null,
int? searchNumThreads = null,
int? indexNumThreads = null,
bool? noEis = null,
int? bootstrapTimeout = null,
// index options
- string? indexNamePrefix = null,
bool? forceReindex = null,
// channel buffer options
int? bufferSize = null,
@@ -88,13 +84,11 @@ public async Task Index(IDiagnosticsCollector collector,
ApiKey = apiKey,
Username = username,
Password = password,
- NoSemantic = noSemantic,
EnableAiEnrichment = enableAiEnrichment,
SearchNumThreads = searchNumThreads,
IndexNumThreads = indexNumThreads,
NoEis = noEis,
BootstrapTimeout = bootstrapTimeout,
- IndexNamePrefix = indexNamePrefix,
ForceReindex = forceReindex,
BufferSize = bufferSize,
MaxRetries = maxRetries,
@@ -114,6 +108,7 @@ public async Task Index(IDiagnosticsCollector collector,
return await Build(collector, fileSystem,
metadataOnly: true, strict: false, path: path, output: null, pathPrefix: null,
force: true, allowIndexing: null, exporters: exporters, canonicalBaseUrl: null,
+ skipOpenApi: true,
ctx: ctx);
}
}
diff --git a/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs b/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs
index b49b02250..7c26b7843 100644
--- a/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs
+++ b/src/services/Elastic.Documentation.Search/Common/ElasticsearchClientAccessor.cs
@@ -4,7 +4,9 @@
using Elastic.Clients.Elasticsearch;
using Elastic.Clients.Elasticsearch.Serialization;
+using Elastic.Documentation.Configuration;
using Elastic.Documentation.Configuration.Search;
+using Elastic.Documentation.Search;
using Elastic.Transport;
namespace Elastic.Documentation.Search.Common;
@@ -18,52 +20,49 @@ public class ElasticsearchClientAccessor : IDisposable
private readonly ElasticsearchClientSettings _clientSettings;
private readonly SingleNodePool _nodePool;
public ElasticsearchClient Client { get; }
- public ElasticsearchOptions Options { get; }
+ public ElasticsearchEndpoint Endpoint { get; }
public SearchConfiguration SearchConfiguration { get; }
+ public string SearchIndex { get; }
public string? RulesetName { get; }
public IReadOnlyDictionary SynonymBiDirectional { get; }
public IReadOnlyCollection DiminishTerms { get; }
public ElasticsearchClientAccessor(
- ElasticsearchOptions elasticsearchOptions,
- SearchConfiguration searchConfiguration)
+ DocumentationEndpoints endpoints,
+ SearchConfiguration searchConfiguration
+ )
{
- Options = elasticsearchOptions;
+ var endpoint = endpoints.Elasticsearch;
+ Endpoint = endpoint;
SearchConfiguration = searchConfiguration;
SynonymBiDirectional = searchConfiguration.SynonymBiDirectional;
DiminishTerms = searchConfiguration.DiminishTerms;
+
+ SearchIndex = DocumentationMappingContext.DocumentationDocumentSemantic
+ .CreateContext(type: "assembler")
+ .ResolveReadTarget();
+
RulesetName = searchConfiguration.Rules.Count > 0
- ? ExtractRulesetName(elasticsearchOptions.IndexName)
+ ? "docs-ruleset-assembler"
: null;
- _nodePool = new SingleNodePool(new Uri(elasticsearchOptions.Url.Trim()));
+ _nodePool = new SingleNodePool(endpoint.Uri);
+ var auth = endpoint.ApiKey is { } apiKey
+ ? (AuthorizationHeader)new ApiKey(apiKey)
+ : endpoint is { Username: { } username, Password: { } password }
+ ? new BasicAuthentication(username, password)
+ : null!;
+
_clientSettings = new ElasticsearchClientSettings(
_nodePool,
sourceSerializer: (_, settings) => new DefaultSourceSerializer(settings, EsJsonContext.Default)
)
- .DefaultIndex(elasticsearchOptions.IndexName)
- .Authentication(new ApiKey(elasticsearchOptions.ApiKey));
+ .DefaultIndex(SearchIndex)
+ .Authentication(auth);
Client = new ElasticsearchClient(_clientSettings);
}
- ///
- /// Extracts the ruleset name from the index name.
- /// Index name format: "semantic-docs-{namespace}-latest" -> ruleset: "docs-ruleset-{namespace}"
- /// The namespace may contain hyphens (e.g., "codex-engineering"), so we extract everything
- /// between the "semantic-docs-" prefix and the "-latest" suffix.
- ///
- private static string? ExtractRulesetName(string indexName)
- {
- const string prefix = "semantic-docs-";
- const string suffix = "-latest";
- if (!indexName.StartsWith(prefix, StringComparison.Ordinal) || !indexName.EndsWith(suffix, StringComparison.Ordinal))
- return null;
-
- var ns = indexName[prefix.Length..^suffix.Length];
- return string.IsNullOrEmpty(ns) ? null : $"docs-ruleset-{ns}";
- }
-
///
/// Tests connectivity to the Elasticsearch cluster.
///
diff --git a/src/services/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj b/src/services/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj
index 27eb575bc..8a350648d 100644
--- a/src/services/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj
+++ b/src/services/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj
@@ -17,7 +17,6 @@
-
diff --git a/src/services/Elastic.Documentation.Search/ElasticsearchOptions.cs b/src/services/Elastic.Documentation.Search/ElasticsearchOptions.cs
deleted file mode 100644
index 9327ae816..000000000
--- a/src/services/Elastic.Documentation.Search/ElasticsearchOptions.cs
+++ /dev/null
@@ -1,39 +0,0 @@
-// Licensed to Elasticsearch B.V under one or more agreements.
-// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
-// See the LICENSE file in the project root for more information
-
-using Microsoft.Extensions.Configuration;
-
-namespace Elastic.Documentation.Search;
-
-public class ElasticsearchOptions
-{
- public ElasticsearchOptions(IConfiguration configuration)
- {
- // Build a new ConfigurationBuilder to read user secrets
- var configBuilder = new ConfigurationBuilder();
- _ = configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3");
- var userSecretsConfig = configBuilder.Build();
- var elasticUrlFromSecret = userSecretsConfig["Parameters:DocumentationElasticUrl"];
- var elasticApiKeyFromSecret = userSecretsConfig["Parameters:DocumentationElasticApiKey"];
-
- Url = GetEnv("DOCUMENTATION_ELASTIC_URL", elasticUrlFromSecret);
- ApiKey = GetEnv("DOCUMENTATION_ELASTIC_APIKEY", elasticApiKeyFromSecret);
- IndexName = configuration["DOCUMENTATION_ELASTIC_INDEX"] ?? "semantic-docs-dev-latest";
- }
-
- private static string GetEnv(string name, string? defaultValue = null)
- {
- var value = Environment.GetEnvironmentVariable(name);
- if (!string.IsNullOrEmpty(value))
- return value;
- if (defaultValue != null)
- return defaultValue;
- throw new ArgumentException($"Environment variable '{name}' not found.");
- }
-
- // Read from environment variables (set by Terraform from SSM at deploy time)
- public string Url { get; }
- public string ApiKey { get; }
- public string IndexName { get; }
-}
diff --git a/src/services/Elastic.Documentation.Search/FullSearchGateway.cs b/src/services/Elastic.Documentation.Search/FullSearchGateway.cs
index e0ad1825f..00cdc092a 100644
--- a/src/services/Elastic.Documentation.Search/FullSearchGateway.cs
+++ b/src/services/Elastic.Documentation.Search/FullSearchGateway.cs
@@ -102,7 +102,7 @@ private async Task SearchWithHybridRrf(FullSearchRequest reque
var response = await clientAccessor.Client.SearchAsync(s =>
{
_ = s
- .Indices(clientAccessor.Options.IndexName)
+ .Indices(clientAccessor.SearchIndex)
.From(Math.Max(request.PageNumber - 1, 0) * request.PageSize)
.Size(request.PageSize)
.Query(filteredQuery)
@@ -170,7 +170,7 @@ private async Task SearchLexicalOnly(FullSearchRequest request
var response = await clientAccessor.Client.SearchAsync(s =>
{
_ = s
- .Indices(clientAccessor.Options.IndexName)
+ .Indices(clientAccessor.SearchIndex)
.From(Math.Max(request.PageNumber - 1, 0) * request.PageSize)
.Size(request.PageSize)
.Query(filteredQuery)
diff --git a/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs b/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs
index a8c3710ae..dcb20c3e5 100644
--- a/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs
+++ b/src/services/Elastic.Documentation.Search/NavigationSearchGateway.cs
@@ -46,7 +46,7 @@ public async Task SearchImplementation(string query, int
var response = await clientAccessor.Client.SearchAsync(s =>
{
_ = s
- .Indices(clientAccessor.Options.IndexName)
+ .Indices(clientAccessor.SearchIndex)
.From(Math.Max(pageNumber - 1, 0) * pageSize)
.Size(pageSize)
.Query(lexicalQuery)
@@ -167,7 +167,7 @@ public async Task ExplainDocumentAsync(string query, string docum
{
// First, find the document by URL
var getDocResponse = await clientAccessor.Client.SearchAsync(s => s
- .Indices(clientAccessor.Options.IndexName)
+ .Indices(clientAccessor.SearchIndex)
.Query(q => q.Term(t => t.Field(f => f.Url).Value(documentUrl)))
.Size(1), ctx);
@@ -186,7 +186,7 @@ public async Task ExplainDocumentAsync(string query, string docum
// Now explain why this document matches (or doesn't match) the query
var explainResponse = await clientAccessor.Client.ExplainAsync(
- clientAccessor.Options.IndexName, documentId, e => e.Query(combinedQuery), ctx);
+ clientAccessor.SearchIndex, documentId, e => e.Query(combinedQuery), ctx);
if (!explainResponse.IsValidResponse)
{
diff --git a/src/services/Elastic.Documentation.Search/ServicesExtension.cs b/src/services/Elastic.Documentation.Search/ServicesExtension.cs
index 9b505c6f2..99e6619ae 100644
--- a/src/services/Elastic.Documentation.Search/ServicesExtension.cs
+++ b/src/services/Elastic.Documentation.Search/ServicesExtension.cs
@@ -25,8 +25,6 @@ public static IServiceCollection AddSearchServices(this IServiceCollection servi
var logger = GetLogger(services);
logger?.LogInformation("Configuring Search services");
- // Shared Elasticsearch options - DI auto-resolves IConfiguration from primary constructor
- _ = services.AddSingleton();
_ = services.AddSingleton();
// Navigation Search (autocomplete/navigation search)
diff --git a/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs b/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs
index 115dda5b9..df29d5666 100644
--- a/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs
+++ b/src/tooling/docs-builder/Commands/Assembler/AssemblerIndexCommand.cs
@@ -30,12 +30,10 @@ ICoreService githubActionsService
/// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY
/// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME
/// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD
- /// Index without semantic fields
/// Enable AI enrichment of documents using LLM-generated metadata
/// The number of search threads the inference endpoint should use. Defaults: 8
/// The number of index threads the inference endpoint should use. Defaults: 8
/// Do not use the Elastic Inference Service, bootstrap inference endpoint
- /// The prefix for the computed index/alias names. Defaults: semantic-docs
/// Force reindex strategy to semantic index
/// Timeout in minutes for the inference endpoint creation. Defaults: 4
/// The number of documents to send to ES as part of the bulk. Defaults: 100
@@ -59,7 +57,6 @@ public async Task Index(
string? password = null,
// inference options
- bool? noSemantic = null,
bool? enableAiEnrichment = null,
int? searchNumThreads = null,
int? indexNumThreads = null,
@@ -67,7 +64,6 @@ public async Task Index(
int? bootstrapTimeout = null,
// index options
- string? indexNamePrefix = null,
bool? forceReindex = null,
// channel buffer options
@@ -97,9 +93,9 @@ public async Task Index(
// endpoint options
endpoint, environment, apiKey, username, password,
// inference options
- noSemantic, enableAiEnrichment, indexNumThreads, searchNumThreads, noEis, bootstrapTimeout,
+ enableAiEnrichment, indexNumThreads, searchNumThreads, noEis, bootstrapTimeout,
// channel and connection options
- indexNamePrefix, forceReindex, bufferSize, maxRetries, debugMode,
+ forceReindex, bufferSize, maxRetries, debugMode,
// proxy options
proxyAddress, proxyPassword, proxyUsername,
// certificate options
@@ -110,9 +106,9 @@ static async (s, collector, state, ctx) => await s.Index(collector, state.fs,
// endpoint options
state.endpoint, state.environment, state.apiKey, state.username, state.password,
// inference options
- state.noSemantic, state.enableAiEnrichment, state.searchNumThreads, state.indexNumThreads, state.noEis, state.bootstrapTimeout,
+ state.enableAiEnrichment, state.searchNumThreads, state.indexNumThreads, state.noEis, state.bootstrapTimeout,
// channel and connection options
- state.indexNamePrefix, state.forceReindex, state.bufferSize, state.maxRetries, state.debugMode,
+ state.forceReindex, state.bufferSize, state.maxRetries, state.debugMode,
// proxy options
state.proxyAddress, state.proxyPassword, state.proxyUsername,
// certificate options
diff --git a/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs b/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs
index 6b70e50de..ae11fbb96 100644
--- a/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs
+++ b/src/tooling/docs-builder/Commands/Codex/CodexIndexCommand.cs
@@ -36,12 +36,10 @@ ICoreService githubActionsService
/// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY
/// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME
/// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD
- /// Index without semantic fields
/// Enable AI enrichment of documents using LLM-generated metadata
/// The number of search threads the inference endpoint should use. Defaults: 8
/// The number of index threads the inference endpoint should use. Defaults: 8
/// Do not use the Elastic Inference Service, bootstrap inference endpoint
- /// The prefix for the computed index/alias names. Defaults: semantic-docs
/// Force reindex strategy to semantic index
/// Timeout in minutes for the inference endpoint creation. Defaults: 4
/// The number of documents to send to ES as part of the bulk. Defaults: 100
@@ -65,7 +63,6 @@ public async Task Index(
string? password = null,
// inference options
- bool? noSemantic = null,
bool? enableAiEnrichment = null,
int? searchNumThreads = null,
int? indexNumThreads = null,
@@ -73,7 +70,6 @@ public async Task Index(
int? bootstrapTimeout = null,
// index options
- string? indexNamePrefix = null,
bool? forceReindex = null,
// channel buffer options
@@ -134,13 +130,11 @@ public async Task Index(
ApiKey = apiKey,
Username = username,
Password = password,
- NoSemantic = noSemantic,
EnableAiEnrichment = enableAiEnrichment,
SearchNumThreads = searchNumThreads,
IndexNumThreads = indexNumThreads,
NoEis = noEis,
BootstrapTimeout = bootstrapTimeout,
- IndexNamePrefix = indexNamePrefix,
ForceReindex = forceReindex,
BufferSize = bufferSize,
MaxRetries = maxRetries,
diff --git a/src/tooling/docs-builder/Commands/IndexCommand.cs b/src/tooling/docs-builder/Commands/IndexCommand.cs
index efc1af596..ff402ce16 100644
--- a/src/tooling/docs-builder/Commands/IndexCommand.cs
+++ b/src/tooling/docs-builder/Commands/IndexCommand.cs
@@ -28,11 +28,9 @@ ICoreService githubActionsService
/// Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY
/// Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME
/// Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD
- /// Index without semantic fields
/// Enable AI enrichment of documents using LLM-generated metadata
/// The number of search threads the inference endpoint should use. Defaults: 8
/// The number of index threads the inference endpoint should use. Defaults: 8
- /// The prefix for the computed index/alias names. Defaults: semantic-docs
/// Do not use the Elastic Inference Service, bootstrap inference endpoint
/// Force reindex strategy to semantic index
/// Timeout in minutes for the inference endpoint creation. Defaults: 4
@@ -57,7 +55,6 @@ public async Task Index(
string? password = null,
// inference options
- bool? noSemantic = null,
bool? enableAiEnrichment = null,
int? searchNumThreads = null,
int? indexNumThreads = null,
@@ -65,7 +62,6 @@ public async Task Index(
int? bootstrapTimeout = null,
// index options
- string? indexNamePrefix = null,
bool? forceReindex = null,
// channel buffer options
@@ -95,9 +91,9 @@ public async Task Index(
// endpoint options
endpoint, apiKey, username, password,
// inference options
- noSemantic, enableAiEnrichment, indexNumThreads, noEis, searchNumThreads, bootstrapTimeout,
+ enableAiEnrichment, indexNumThreads, noEis, searchNumThreads, bootstrapTimeout,
// channel and connection options
- indexNamePrefix, forceReindex, bufferSize, maxRetries, debugMode,
+ forceReindex, bufferSize, maxRetries, debugMode,
// proxy options
proxyAddress, proxyPassword, proxyUsername,
// certificate options
@@ -108,9 +104,9 @@ static async (s, collector, state, ctx) => await s.Index(collector, state.fs, st
// endpoint options
state.endpoint, state.apiKey, state.username, state.password,
// inference options
- state.noSemantic, state.enableAiEnrichment, state.searchNumThreads, state.indexNumThreads, state.noEis, state.bootstrapTimeout,
+ state.enableAiEnrichment, state.searchNumThreads, state.indexNumThreads, state.noEis, state.bootstrapTimeout,
// channel and connection options
- state.indexNamePrefix, state.forceReindex, state.bufferSize, state.maxRetries, state.debugMode,
+ state.forceReindex, state.bufferSize, state.maxRetries, state.debugMode,
// proxy options
state.proxyAddress, state.proxyPassword, state.proxyUsername,
// certificate options
diff --git a/src/tooling/docs-builder/DocumentationTooling.cs b/src/tooling/docs-builder/DocumentationTooling.cs
index 298d82cd2..f8b3a952c 100644
--- a/src/tooling/docs-builder/DocumentationTooling.cs
+++ b/src/tooling/docs-builder/DocumentationTooling.cs
@@ -14,6 +14,7 @@
using Elastic.Documentation.Configuration.Search;
using Elastic.Documentation.Configuration.Versions;
using Elastic.Documentation.Diagnostics;
+using Elastic.Documentation.ServiceDefaults;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
@@ -42,33 +43,10 @@ public static TBuilder AddDocumentationToolingDefaults(this TBuilder b
return new DiagnosticsCollector([]);
return new ConsoleDiagnosticsCollector(logFactory, githubActionsService);
})
- .AddSingleton(sp =>
+ .AddSingleton(_ =>
{
- var resolver = sp.GetRequiredService();
- var elasticsearchUri = ResolveServiceEndpoint(resolver,
- () => TryEnvVars("http://localhost:9200", "DOCUMENTATION_ELASTIC_URL", "CONNECTIONSTRINGS__ELASTICSEARCH")
- );
- var elasticsearchPassword =
- elasticsearchUri.UserInfo is { } userInfo && userInfo.Contains(':')
- ? userInfo.Split(':')[1]
- : TryEnvVarsOptional("DOCUMENTATION_ELASTIC_PASSWORD");
-
- var elasticsearchUser =
- elasticsearchUri.UserInfo is { } userInfo2 && userInfo2.Contains(':')
- ? userInfo2.Split(':')[0]
- : TryEnvVars("elastic", "DOCUMENTATION_ELASTIC_USERNAME");
-
- var elasticsearchApiKey = TryEnvVarsOptional("DOCUMENTATION_ELASTIC_APIKEY");
- return new DocumentationEndpoints
- {
- Elasticsearch = new ElasticsearchEndpoint
- {
- Uri = elasticsearchUri,
- Password = elasticsearchPassword,
- ApiKey = elasticsearchApiKey,
- Username = elasticsearchUser
- },
- };
+ var endpoints = ElasticsearchEndpointFactory.Create(builder.Configuration);
+ return endpoints;
})
.AddSingleton(sp =>
{
diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs
index 18b096b6a..cb9411ec5 100644
--- a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs
+++ b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs
@@ -7,12 +7,15 @@
using Documentation.Builder.Diagnostics.Console;
using Elastic.Documentation.Aspire;
using Elastic.Documentation.Configuration;
+using Elastic.Documentation.Search;
+using Elastic.Documentation.ServiceDefaults;
using Elastic.Ingest.Elasticsearch;
+using Elastic.Ingest.Elasticsearch.Indices;
+using Elastic.Mapping;
using Elastic.Markdown.Exporters.Elasticsearch;
using Elastic.Transport;
using Elastic.Transport.Products.Elasticsearch;
using FluentAssertions;
-using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
@@ -136,61 +139,33 @@ private async ValueTask IsIndexingNeeded()
{
try
{
- // Get Elasticsearch configuration from Aspire
- var (elasticsearchUrl, apiKey, password, username) = GetElasticsearchConfiguration();
+ var endpoints = ElasticsearchEndpointFactory.Create();
- if (string.IsNullOrEmpty(elasticsearchUrl))
- {
- Console.WriteLine("No Elasticsearch URL configured, indexing will be performed.");
- Connected = false;
- return false;
- }
-
- Console.WriteLine($"Checking remote Elasticsearch at {elasticsearchUrl} for existing data...");
-
- // Create Elasticsearch endpoint configuration
- var endpoint = new ElasticsearchEndpoint
- {
- Uri = new Uri(elasticsearchUrl),
- ApiKey = apiKey,
- Username = username,
- Password = password
- };
-
- // Create transport configuration (similar to ElasticsearchMarkdownExporter)
- var configuration = new ElasticsearchConfiguration(endpoint.Uri)
- {
- Authentication = endpoint.ApiKey is { } eApiKey
- ? new ApiKey(eApiKey)
- : endpoint is { Username: { } eUsername, Password: { } ePassword }
- ? new BasicAuthentication(eUsername, ePassword)
- : null,
- EnableHttpCompression = true
- };
+ var endpoint = endpoints.Elasticsearch;
+ Console.WriteLine($"Checking remote Elasticsearch at {endpoint.Uri} for existing data...");
- var transport = new DistributedTransport(configuration);
+ var transport = ElasticsearchTransportFactory.Create(endpoint);
Connected = (await transport.HeadAsync("/", TestContext.Current.CancellationToken)).ApiCallDetails.HasSuccessfulStatusCode;
// Create a logger factory and diagnostics collector
var loggerFactory = fixture.DistributedApplication.Services.GetRequiredService();
var collector = new ConsoleDiagnosticsCollector(loggerFactory);
- // Create semantic exporter to check channel hash (index namespace is 'dev' for tests)
- using var semanticExporter = new ElasticsearchSemanticIngestChannel(
- loggerFactory,
- collector,
- endpoint,
- "dev", // index namespace
- transport,
- []
- );
+ // Create semantic type context to check channel hash (index namespace is 'dev' for tests)
+ var semanticTypeContext = DocumentationMappingContext.DocumentationDocumentSemantic.CreateContext(type: "assembler") with
+ {
+ ConfigureAnalysis = a => DocumentationAnalysisFactory.BuildAnalysis(a, "docs-assembler", [])
+ };
+
+ var options = new IngestChannelOptions(transport, semanticTypeContext);
+ using var channel = new IngestChannel(options);
// Get the current hash from Elasticsearch index template
- var currentSemanticHash = await semanticExporter.Channel.GetIndexTemplateHashAsync(TestContext.Current.CancellationToken) ?? string.Empty;
+ var currentSemanticHash = await channel.GetIndexTemplateHashAsync(TestContext.Current.CancellationToken) ?? string.Empty;
- // Get the expected channel hash from the semantic exporter
- await semanticExporter.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Silent, ctx: TestContext.Current.CancellationToken);
- var expectedSemanticHash = semanticExporter.Channel.ChannelHash;
+ // Get the expected channel hash
+ _ = await channel.BootstrapElasticsearchAsync(BootstrapMethod.Silent, TestContext.Current.CancellationToken);
+ var expectedSemanticHash = channel.ChannelHash;
Console.WriteLine($"Elasticsearch semantic hash: '{currentSemanticHash}'");
Console.WriteLine($"Expected semantic hash: '{expectedSemanticHash}'");
@@ -232,41 +207,6 @@ private async ValueTask ValidateResourceExitCode(string resourceName)
Console.WriteLine($"{resourceName} completed with exit code 0");
}
- ///
- /// Gets Elasticsearch configuration from Aspire parameters and environment.
- /// Manually reads user secrets from the aspire project, then falls back to environment variables.
- ///
- private (string? Url, string? ApiKey, string? Password, string? Username) GetElasticsearchConfiguration()
- {
- // Manually read user secrets from the aspire project
- // UserSecretsId from aspire.csproj: 72f50f33-6fb9-4d08-bff3-39568fe370b3
- var configBuilder = new ConfigurationBuilder();
- configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3");
- var userSecretsConfig = configBuilder.Build();
-
- // Get URL - try user secrets first, then Aspire configuration, then environment
- var url = userSecretsConfig["Parameters:DocumentationElasticUrl"]
- ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticUrl"]
- ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL");
-
- // Get API Key - try user secrets first, then Aspire configuration, then environment
- var apiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"]
- ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticApiKey"]
- ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY");
-
- // Get password for local Elasticsearch (when using --start-elasticsearch)
- var password = userSecretsConfig["Parameters:DocumentationElasticPassword"] ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_PASSWORD");
-
- // Get username (defaults to "elastic")
- var username = userSecretsConfig["Parameters:DocumentationElasticUsername"]
- ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_USERNAME")
- ?? "elastic";
-
- Console.WriteLine($"Elasticsearch configuration retrieved: URL={url != null}, ApiKey={apiKey != null}, Password={password != null}");
-
- return (url, apiKey, password, username);
- }
-
public ValueTask DisposeAsync()
{
HttpClient?.Dispose();
diff --git a/tests-integration/Mcp.Remote.IntegrationTests/Mcp.Remote.IntegrationTests.csproj b/tests-integration/Mcp.Remote.IntegrationTests/Mcp.Remote.IntegrationTests.csproj
index 2896294b0..8889bbcba 100644
--- a/tests-integration/Mcp.Remote.IntegrationTests/Mcp.Remote.IntegrationTests.csproj
+++ b/tests-integration/Mcp.Remote.IntegrationTests/Mcp.Remote.IntegrationTests.csproj
@@ -10,6 +10,7 @@
+
diff --git a/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs b/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs
index a6ec7c7dd..67f8570a6 100644
--- a/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs
+++ b/tests-integration/Mcp.Remote.IntegrationTests/McpToolsIntegrationTestsBase.cs
@@ -10,7 +10,7 @@
using Elastic.Documentation.Mcp.Remote.Tools;
using Elastic.Documentation.Search;
using Elastic.Documentation.Search.Common;
-using Microsoft.Extensions.Configuration;
+using Elastic.Documentation.ServiceDefaults;
using Microsoft.Extensions.Logging.Abstractions;
namespace Mcp.Remote.IntegrationTests;
@@ -25,17 +25,16 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output)
///
/// Creates SearchTools with all required dependencies.
///
- protected (SearchTools? Tools, ElasticsearchClientAccessor? ClientAccessor) CreateSearchTools()
+ protected (SearchTools Tools, ElasticsearchClientAccessor? ClientAccessor) CreateSearchTools()
{
var clientAccessor = CreateElasticsearchClientAccessor();
- if (clientAccessor == null)
- return (null, null);
var productsConfig = CreateProductsConfiguration();
var fullSearchGateway = new FullSearchGateway(
clientAccessor,
productsConfig,
- NullLogger.Instance);
+ NullLogger.Instance
+ );
var searchTools = new SearchTools(fullSearchGateway, NullLogger.Instance);
return (searchTools, clientAccessor);
@@ -44,16 +43,11 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output)
///
/// Creates DocumentTools with all required dependencies.
///
- protected (DocumentTools? Tools, ElasticsearchClientAccessor? ClientAccessor) CreateDocumentTools()
+ protected (DocumentTools Tools, ElasticsearchClientAccessor? ClientAccessor) CreateDocumentTools()
{
var clientAccessor = CreateElasticsearchClientAccessor();
- if (clientAccessor == null)
- return (null, null);
-
- var documentGateway = new DocumentGateway(
- clientAccessor,
- NullLogger.Instance);
+ var documentGateway = new DocumentGateway(clientAccessor, NullLogger.Instance);
var documentTools = new DocumentTools(documentGateway, NullLogger.Instance);
return (documentTools, clientAccessor);
}
@@ -61,18 +55,12 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output)
///
/// Creates CoherenceTools with all required dependencies.
///
- protected (CoherenceTools? Tools, ElasticsearchClientAccessor? ClientAccessor) CreateCoherenceTools()
+ protected (CoherenceTools Tools, ElasticsearchClientAccessor? ClientAccessor) CreateCoherenceTools()
{
var clientAccessor = CreateElasticsearchClientAccessor();
- if (clientAccessor == null)
- return (null, null);
var productsConfig = CreateProductsConfiguration();
- var fullSearchGateway = new FullSearchGateway(
- clientAccessor,
- productsConfig,
- NullLogger.Instance);
-
+ var fullSearchGateway = new FullSearchGateway(clientAccessor, productsConfig, NullLogger.Instance);
var coherenceTools = new CoherenceTools(fullSearchGateway, NullLogger.Instance);
return (coherenceTools, clientAccessor);
}
@@ -80,34 +68,10 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output)
///
/// Creates an ElasticsearchClientAccessor using configuration from user secrets and environment variables.
///
- private static ElasticsearchClientAccessor? CreateElasticsearchClientAccessor()
+ private static ElasticsearchClientAccessor CreateElasticsearchClientAccessor()
{
- var configBuilder = new ConfigurationBuilder();
- configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3");
- configBuilder.AddEnvironmentVariables();
- var config = configBuilder.Build();
-
- var elasticsearchUrl =
- config["Parameters:DocumentationElasticUrl"]
- ?? config["DOCUMENTATION_ELASTIC_URL"];
-
- var elasticsearchApiKey =
- config["Parameters:DocumentationElasticApiKey"]
- ?? config["DOCUMENTATION_ELASTIC_APIKEY"];
-
- if (string.IsNullOrEmpty(elasticsearchUrl) || string.IsNullOrEmpty(elasticsearchApiKey))
- return null;
-
- var testConfig = new ConfigurationBuilder()
- .AddInMemoryCollection(new Dictionary
- {
- ["DOCUMENTATION_ELASTIC_URL"] = elasticsearchUrl,
- ["DOCUMENTATION_ELASTIC_APIKEY"] = elasticsearchApiKey,
- ["DOCUMENTATION_ELASTIC_INDEX"] = "semantic-docs-dev-latest"
- })
- .Build();
-
- var options = new ElasticsearchOptions(testConfig);
+ var endpoints = ElasticsearchEndpointFactory.Create();
+
var searchConfig = new SearchConfiguration
{
Synonyms = new Dictionary(),
@@ -115,7 +79,7 @@ public abstract class McpToolsIntegrationTestsBase(ITestOutputHelper output)
DiminishTerms = ["plugin", "client", "integration", "glossary"]
};
- return new ElasticsearchClientAccessor(options, searchConfig);
+ return new ElasticsearchClientAccessor(endpoints, searchConfig);
}
///
diff --git a/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj b/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj
index 0754b8a91..4763c5eb0 100644
--- a/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj
+++ b/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj
@@ -10,6 +10,7 @@
+
diff --git a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs
index 3d93fd012..34246dc35 100644
--- a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs
+++ b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs
@@ -5,8 +5,8 @@
using Elastic.Documentation.Configuration.Search;
using Elastic.Documentation.Search;
using Elastic.Documentation.Search.Common;
+using Elastic.Documentation.ServiceDefaults;
using FluentAssertions;
-using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Logging.Abstractions;
namespace Search.IntegrationTests;
@@ -220,37 +220,10 @@ public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring()
///
/// Creates an ElasticsearchGateway instance using configuration from the distributed application.
///
- private NavigationSearchGateway? CreateFindPageGateway()
+ private NavigationSearchGateway CreateFindPageGateway()
{
- // Build a new ConfigurationBuilder to read user secrets and environment variables
- var configBuilder = new ConfigurationBuilder();
- configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3");
- configBuilder.AddEnvironmentVariables();
- var config = configBuilder.Build();
-
- // Get Elasticsearch configuration with fallback chain: user secrets → environment
- var elasticsearchUrl =
- config["Parameters:DocumentationElasticUrl"]
- ?? config["DOCUMENTATION_ELASTIC_URL"];
-
- var elasticsearchApiKey =
- config["Parameters:DocumentationElasticApiKey"]
- ?? config["DOCUMENTATION_ELASTIC_APIKEY"];
-
- if (elasticsearchUrl is null or "" || elasticsearchApiKey is null or "")
- return null;
-
- // Create IConfiguration with the required values for ElasticsearchOptions
- var testConfig = new ConfigurationBuilder()
- .AddInMemoryCollection(new Dictionary
- {
- ["DOCUMENTATION_ELASTIC_URL"] = elasticsearchUrl,
- ["DOCUMENTATION_ELASTIC_APIKEY"] = elasticsearchApiKey,
- ["DOCUMENTATION_ELASTIC_INDEX"] = "semantic-docs-dev-latest"
- })
- .Build();
+ var endpoints = ElasticsearchEndpointFactory.Create();
- var options = new ElasticsearchOptions(testConfig);
var searchConfig = new SearchConfiguration
{
Synonyms = new Dictionary(),
@@ -278,7 +251,7 @@ public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring()
DiminishTerms = ["plugin", "client", "integration", "glossary"]
};
- var clientAccessor = new ElasticsearchClientAccessor(options, searchConfig);
+ var clientAccessor = new ElasticsearchClientAccessor(endpoints, searchConfig);
return new NavigationSearchGateway(clientAccessor, NullLogger.Instance);
}
}