From 039e7e23c0469efb23726a36ed805e14834bf652 Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Thu, 19 Feb 2026 08:25:40 -0800 Subject: [PATCH 01/20] initial commit for grammar API Signed-off-by: Eric Wei --- .../autocomplete/AutocompleteArtifact.java | 121 ++++++++ .../autocomplete/GrammarArtifactBuilder.java | 202 ++++++++++++++ .../org/opensearch/sql/plugin/SQLPlugin.java | 2 + .../sql/plugin/rest/RestPPLGrammarAction.java | 228 +++++++++++++++ .../plugin/rest/RestPPLGrammarActionTest.java | 260 ++++++++++++++++++ .../PPLAutocompleteArtifactBuilder.java | 122 ++++++++ 6 files changed, 935 insertions(+) create mode 100644 core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java create mode 100644 core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java create mode 100644 plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java create mode 100644 plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java create mode 100644 ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java diff --git a/core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java b/core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java new file mode 100644 index 00000000000..38b87765cbd --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java @@ -0,0 +1,121 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.executor.autocomplete; + +import java.util.Map; +import lombok.Builder; +import lombok.Data; + +/** + * Autocomplete artifact bundle containing everything needed for client-side grammar-based + * autocomplete. + * + *

This bundle is language-agnostic and can be used for PPL, SQL, or any ANTLR-based language. It + * contains: + * + *

+ * + *

Frontend uses this bundle to: + * + *

    + *
  1. Deserialize ATNs with antlr4ng + *
  2. Create LexerInterpreter and ParserInterpreter + *
  3. Use antlr4-c3 to find valid tokens at cursor + *
  4. Generate suggestions from catalogs + *
+ */ +@Data +@Builder +public class AutocompleteArtifact { + + // ============================================================================ + // Identity & versioning + // ============================================================================ + + /** Bundle version (increment when format changes) */ + private String bundleVersion; + + /** + * Hash of grammar sources + ANTLR version. Used for cache validation via ETag. Format: + * "sha256:abc123..." + */ + private String grammarHash; + + // ============================================================================ + // Lexer ATN & metadata + // ============================================================================ + + /** + * Serialized lexer ATN as int array. Frontend uses directly: new + * ATNDeserializer().deserialize(lexerSerializedATN) + */ + private int[] lexerSerializedATN; + + /** Lexer rule names (e.g., ["SEARCH", "WHERE", "PIPE", ...]) */ + private String[] lexerRuleNames; + + /** Channel names (e.g., ["DEFAULT_TOKEN_CHANNEL", "WHITESPACE", "ERRORCHANNEL"]) */ + private String[] channelNames; + + /** Mode names (e.g., ["DEFAULT_MODE"]) */ + private String[] modeNames; + + // ============================================================================ + // Parser ATN & metadata + // ============================================================================ + + /** + * Serialized parser ATN as int array. Frontend uses directly: new + * ATNDeserializer().deserialize(parserSerializedATN) + */ + private int[] parserSerializedATN; + + /** Parser rule names (e.g., ["root", "pplStatement", "commands", ...]) */ + private String[] parserRuleNames; + + /** Start rule index (usually 0 for "root" rule) */ + private int startRuleIndex; + + // ============================================================================ + // Vocabulary + // ============================================================================ + + /** + * Literal names from vocabulary. Index = token type. Values are literal tokens with quotes, or + * null. Example: ["", "'search'", "'where'", "'|'", null, null, ...] + */ + private String[] literalNames; + + /** + * Symbolic names from vocabulary. Index = token type. Values are token symbolic names, or null. + * Example: ["", "SEARCH", "WHERE", "PIPE", "ID", "INTEGER", ...] + */ + private String[] symbolicNames; + + /** + * Optional display names (user-friendly token names). If not provided, frontend uses literal or + * symbolic names. + */ + private String[] displayNames; + + // ============================================================================ + // Token classification + // ============================================================================ + + /** + * Mapping from token symbolic name to suggestion category. Used by frontend to classify antlr4-c3 + * token candidates into suggestion types. + * + *

Example: { "SEARCH": "COMMAND", "WHERE": "COMMAND", "BY": "KEYWORD", "COUNT": "FUNCTION", + * "AND": "OPERATOR" } + */ + private Map tokenTypeToCategory; +} diff --git a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java b/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java new file mode 100644 index 00000000000..5bbffea1e9e --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java @@ -0,0 +1,202 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.executor.autocomplete; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import lombok.extern.log4j.Log4j2; +import org.antlr.v4.runtime.Lexer; +import org.antlr.v4.runtime.Parser; +import org.antlr.v4.runtime.Vocabulary; + +/** + * Utility class for extracting ANTLR grammar artifacts (ATN, vocabulary, rule names) from generated + * parser/lexer classes. + * + *

This class handles the low-level details of: + * + *

    + *
  • Converting ANTLR's Java String ATN format to int[] for JSON transfer + *
  • Extracting vocabulary (literal and symbolic names) + *
  • Extracting rule names via public ANTLR APIs + *
  • Computing grammar hash for versioning + *
+ * + *

Language-specific builders (PPL, SQL) use this class to build their autocomplete bundles. + */ +@Log4j2 +public class GrammarArtifactBuilder { + + /** + * Extract literal names from vocabulary. + * + *

Returns array where index = token type, value = literal token (with quotes) or null. + * + *

Example: ["", "'search'", "'where'", "'|'", null, ...] + * + * @param vocabulary Parser vocabulary + * @return Array of literal names + */ + public static String[] extractLiteralNames(Vocabulary vocabulary) { + int maxTokenType = vocabulary.getMaxTokenType(); + String[] names = new String[maxTokenType + 1]; + + for (int i = 0; i <= maxTokenType; i++) { + String literal = vocabulary.getLiteralName(i); + // Keep nulls as nulls (no literal representation) + names[i] = literal; + } + + log.debug("Extracted {} literal names", names.length); + return names; + } + + /** + * Extract symbolic names from vocabulary. + * + *

Returns array where index = token type, value = symbolic token name or null. + * + *

Example: ["", "SEARCH", "WHERE", "PIPE", "ID", ...] + * + * @param vocabulary Parser vocabulary + * @return Array of symbolic names + */ + public static String[] extractSymbolicNames(Vocabulary vocabulary) { + int maxTokenType = vocabulary.getMaxTokenType(); + String[] names = new String[maxTokenType + 1]; + + for (int i = 0; i <= maxTokenType; i++) { + String symbolic = vocabulary.getSymbolicName(i); + // Keep nulls as nulls (no symbolic name) + names[i] = symbolic; + } + + log.debug("Extracted {} symbolic names", names.length); + return names; + } + + /** + * Extract rule names from parser. + * + *

Parser.getRuleNames() is public API. + * + * @param parser Parser instance + * @return Array of rule names + */ + public static String[] extractParserRuleNames(Parser parser) { + String[] ruleNames = parser.getRuleNames(); + log.debug("Extracted {} parser rule names", ruleNames.length); + return ruleNames; + } + + /** + * Extract rule names from lexer. + * + *

Lexer.getRuleNames() is public API (Lexer extends Recognizer). + * + * @param lexer Lexer instance + * @return Array of lexer rule names + */ + public static String[] extractLexerRuleNames(Lexer lexer) { + String[] ruleNames = lexer.getRuleNames(); + log.debug("Extracted {} lexer rule names", ruleNames.length); + return ruleNames; + } + + /** + * Extract channel names from lexer. + * + *

ANTLR 4.x exposes channel names via getChannelNames() method in generated lexers. + * This method dynamically extracts the actual channel names from the lexer instance. + * + * @param lexer Lexer instance + * @return Array of channel names + */ + public static String[] extractChannelNames(Lexer lexer) { + String[] channelNames = lexer.getChannelNames(); + log.debug("Extracted {} channel names from lexer", channelNames.length); + return channelNames; + } + + /** + * Extract mode names from lexer. + * + *

ANTLR 4.x exposes mode names via getModeNames() method in generated lexers. + * This method dynamically extracts the actual mode names from the lexer instance. + * + * @param lexer Lexer instance + * @return Array of mode names + */ + public static String[] extractModeNames(Lexer lexer) { + String[] modeNames = lexer.getModeNames(); + log.debug("Extracted {} mode names from lexer", modeNames.length); + return modeNames; + } + + /** + * Compute grammar hash from ATN data (recommended). + * + *

This method hashes the serialized ATN arrays directly, which: + * + *

    + *
  • Always available at runtime (no classpath dependencies) + *
  • Reflects the actual artifact being served + *
  • Changes when grammar changes (ATN structure changes) + *
+ * + * @param lexerATN Serialized lexer ATN as int array + * @param parserATN Serialized parser ATN as int array + * @param antlrVersion ANTLR tool version (e.g., "4.13.2") + * @return Hash string in format "sha256:abc123..." + */ + public static String computeGrammarHash(int[] lexerATN, int[] parserATN, String antlrVersion) { + try { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + + // Hash lexer ATN data + for (int value : lexerATN) { + digest.update((byte) (value >> 8)); + digest.update((byte) value); + } + + // Hash parser ATN data + for (int value : parserATN) { + digest.update((byte) (value >> 8)); + digest.update((byte) value); + } + + // Hash ANTLR version to detect generator changes + digest.update(antlrVersion.getBytes(StandardCharsets.UTF_8)); + + // Compute hash + byte[] hashBytes = digest.digest(); + String result = "sha256:" + bytesToHex(hashBytes); + + log.info("Computed grammar hash from ATN data: {}", result); + return result; + + } catch (NoSuchAlgorithmException e) { + // SHA-256 is required by Java specification, this should never happen + throw new IllegalStateException("SHA-256 algorithm not available", e); + } + } + + + /** + * Convert byte array to hex string. + * + * @param bytes Input bytes + * @return Hex string (lowercase) + */ + private static String bytesToHex(byte[] bytes) { + StringBuilder sb = new StringBuilder(bytes.length * 2); + for (byte b : bytes) { + sb.append(String.format("%02x", b & 0xFF)); + } + return sb.toString(); + } +} diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java index d817e13c69f..edffd65f6bf 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java @@ -94,6 +94,7 @@ import org.opensearch.sql.opensearch.storage.OpenSearchDataSourceFactory; import org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine; import org.opensearch.sql.plugin.config.OpenSearchPluginModule; +import org.opensearch.sql.plugin.rest.RestPPLGrammarAction; import org.opensearch.sql.plugin.rest.RestPPLQueryAction; import org.opensearch.sql.plugin.rest.RestPPLStatsAction; import org.opensearch.sql.plugin.rest.RestQuerySettingsAction; @@ -163,6 +164,7 @@ public List getRestHandlers( return Arrays.asList( new RestPPLQueryAction(), + new RestPPLGrammarAction(), new RestSqlAction(settings, injector), new RestSqlStatsAction(settings, restController), new RestPPLStatsAction(settings, restController), diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java new file mode 100644 index 00000000000..c8908cd7228 --- /dev/null +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java @@ -0,0 +1,228 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.plugin.rest; + +import static org.opensearch.rest.RestRequest.Method.GET; + +import com.google.common.collect.ImmutableList; +import java.io.IOException; +import java.util.List; +import lombok.extern.log4j.Log4j2; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.rest.BaseRestHandler; +import org.opensearch.rest.BytesRestResponse; +import org.opensearch.rest.RestRequest; +import org.opensearch.sql.executor.autocomplete.AutocompleteArtifact; +import org.opensearch.sql.ppl.autocomplete.PPLAutocompleteArtifactBuilder; +import org.opensearch.transport.client.node.NodeClient; + +/** + * REST handler for PPL grammar metadata endpoint. + * + *

Endpoint: GET /_plugins/_ppl/_grammar + * + *

Returns grammar data for client-side parsing, autocomplete, syntax highlighting, etc: + * + *

    + *
  • Serialized ANTLR ATN data (lexer + parser) + *
  • Vocabulary (literal and symbolic names) + *
  • Rule names (parser and lexer) + *
  • Channel and mode names + *
+ * + *

The bundle is cached in memory and versioned via ETag (grammar hash). Clients should cache + * locally and only refetch when ETag changes. + * + *

Example request: + * + *

+ * GET /_plugins/_ppl/_grammar
+ * If-None-Match: "sha256:abc123..."
+ * 
+ * + *

Response headers: + * + *

+ * HTTP/1.1 200 OK
+ * ETag: "sha256:abc123..."
+ * Cache-Control: public, max-age=3600
+ * Content-Type: application/json
+ * 
+ * + *

Or if client has latest: + * + *

+ * HTTP/1.1 304 Not Modified
+ * ETag: "sha256:abc123..."
+ * 
+ */ +@Log4j2 +public class RestPPLGrammarAction extends BaseRestHandler { + + private static final String ENDPOINT_PATH = "/_plugins/_ppl/_grammar"; + + // Lazy-initialized singleton artifact (built once per JVM lifecycle) + private volatile AutocompleteArtifact cachedArtifact; + private final Object artifactLock = new Object(); + + @Override + public String getName() { + return "ppl_grammar_action"; + } + + @Override + public List routes() { + return ImmutableList.of(new Route(GET, ENDPOINT_PATH)); + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) + throws IOException { + + log.debug("Received PPL grammar request"); + + return channel -> { + try { + // Get or build artifact (lazy initialization) + AutocompleteArtifact artifact = getOrBuildArtifact(); + + String grammarHash = artifact.getGrammarHash(); + String ifNoneMatch = request.header("If-None-Match"); + + // Strip quotes from ETag if present ("sha256:..." → sha256:...) + if (ifNoneMatch != null && ifNoneMatch.startsWith("\"") && ifNoneMatch.endsWith("\"")) { + ifNoneMatch = ifNoneMatch.substring(1, ifNoneMatch.length() - 1); + } + + // Return 304 Not Modified if client has latest version + if (grammarHash.equals(ifNoneMatch)) { + log.debug("Client has latest grammar (ETag match), returning 304"); + BytesRestResponse response = + new BytesRestResponse(RestStatus.NOT_MODIFIED, "application/json", ""); + response.addHeader("ETag", "\"" + grammarHash + "\""); + response.addHeader("Cache-Control", "public, max-age=3600"); + channel.sendResponse(response); + return; + } + + // Serialize artifact to JSON + log.debug("Serializing grammar to JSON (hash: {})", grammarHash); + XContentBuilder builder = channel.newBuilder(); + serializeArtifact(builder, artifact); + + BytesRestResponse response = new BytesRestResponse(RestStatus.OK, builder); + + // Add caching headers + response.addHeader("ETag", "\"" + grammarHash + "\""); + response.addHeader("Cache-Control", "public, max-age=3600"); + response.addHeader("Content-Type", "application/json; charset=UTF-8"); + + log.info("Returning PPL grammar (size: {} bytes)", response.content().length()); + channel.sendResponse(response); + + } catch (Exception e) { + log.error("Error building or serializing PPL grammar", e); + channel.sendResponse(new BytesRestResponse(channel, RestStatus.INTERNAL_SERVER_ERROR, e)); + } + }; + } + + /** + * Get cached artifact or build it if not initialized. + * + *

Thread-safe lazy initialization with double-checked locking. + */ + private AutocompleteArtifact getOrBuildArtifact() { + // First check without lock (common case: already initialized) + if (cachedArtifact != null) { + return cachedArtifact; + } + + // Acquire lock for initialization + synchronized (artifactLock) { + // Double-check after acquiring lock + if (cachedArtifact == null) { + log.info("Building PPL grammar artifact (first request)..."); + long startTime = System.currentTimeMillis(); + + PPLAutocompleteArtifactBuilder builder = new PPLAutocompleteArtifactBuilder(); + cachedArtifact = builder.buildArtifact(); + + long elapsed = System.currentTimeMillis() - startTime; + log.info("Built PPL grammar in {}ms (hash: {})", elapsed, cachedArtifact.getGrammarHash()); + } + return cachedArtifact; + } + } + + /** + * Invalidate cached artifact (for testing or grammar updates). + * + *

Note: This only affects the current node. In a multi-node cluster, each node maintains its + * own cache. + */ + public void invalidateCache() { + synchronized (artifactLock) { + log.info("Invalidating cached PPL grammar"); + cachedArtifact = null; + } + } + + /** + * Manually serialize AutocompleteArtifact to JSON. + * + *

Outputs ATN data and arrays as standard JSON arrays. + */ + private void serializeArtifact(XContentBuilder builder, AutocompleteArtifact artifact) + throws IOException { + builder.startObject(); + + // Identity & versioning + builder.field("bundleVersion", artifact.getBundleVersion()); + builder.field("grammarHash", artifact.getGrammarHash()); + builder.field("startRuleIndex", artifact.getStartRuleIndex()); + + // Lexer ATN & metadata + if (artifact.getLexerSerializedATN() != null) { + builder.field("lexerSerializedATN", artifact.getLexerSerializedATN()); + log.debug("Lexer ATN: {} elements", artifact.getLexerSerializedATN().length); + } + + if (artifact.getLexerRuleNames() != null) { + builder.field("lexerRuleNames", artifact.getLexerRuleNames()); + } + + if (artifact.getChannelNames() != null) { + builder.field("channelNames", artifact.getChannelNames()); + } + + if (artifact.getModeNames() != null) { + builder.field("modeNames", artifact.getModeNames()); + } + + // Parser ATN & metadata + if (artifact.getParserSerializedATN() != null) { + builder.field("parserSerializedATN", artifact.getParserSerializedATN()); + log.debug("Parser ATN: {} elements", artifact.getParserSerializedATN().length); + } + + if (artifact.getParserRuleNames() != null) { + builder.field("parserRuleNames", artifact.getParserRuleNames()); + } + + // Vocabulary + if (artifact.getLiteralNames() != null) { + builder.field("literalNames", artifact.getLiteralNames()); + } + + if (artifact.getSymbolicNames() != null) { + builder.field("symbolicNames", artifact.getSymbolicNames()); + } + + builder.endObject(); + } +} diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java new file mode 100644 index 00000000000..cfcd8c9892e --- /dev/null +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -0,0 +1,260 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.plugin.rest; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; + +import java.io.ByteArrayOutputStream; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import org.junit.Before; +import org.junit.Test; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.rest.RestChannel; +import org.opensearch.rest.RestRequest; +import org.opensearch.rest.RestResponse; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.rest.FakeRestRequest; +import org.opensearch.transport.client.node.NodeClient; + +/** + * Unit tests for RestPPLGrammarAction. + * + *

Tests: + * + *

    + *
  • 200 OK response with grammar bundle + *
  • 304 Not Modified when ETag matches + *
  • Grammar structure validation + *
  • Cache behavior + *
+ */ +public class RestPPLGrammarActionTest extends OpenSearchTestCase { + + private RestPPLGrammarAction action; + private NodeClient client; + + @Before + public void setUp() throws Exception { + super.setUp(); + action = new RestPPLGrammarAction(); + client = mock(NodeClient.class); + } + + @Test + public void testName() { + assertEquals("ppl_grammar_action", action.getName()); + } + + @Test + public void testRoutes() { + assertEquals(1, action.routes().size()); + assertEquals(RestRequest.Method.GET, action.routes().get(0).getMethod()); + assertEquals("/_plugins/_ppl/_grammar", action.routes().get(0).getPath()); + } + + @Test + public void testGetGrammar_ReturnsBundle() throws Exception { + // Create request without ETag + FakeRestRequest request = + new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) + .withMethod(RestRequest.Method.GET) + .withPath("/_plugins/_ppl/_grammar") + .withHeaders(Collections.emptyMap()) + .build(); + + // Create mock channel to capture response + MockRestChannel channel = new MockRestChannel(request, true); + + // Execute request + action.prepareRequest(request, client).accept(channel); + + // Verify response + RestResponse response = channel.getResponse(); + assertNotNull("Response should not be null", response); + assertEquals("Should return 200 OK", RestStatus.OK, response.status()); + + // Verify ETag header present + String etag = response.getHeaders().get("ETag").get(0); + assertNotNull("ETag header should be present", etag); + assertTrue("ETag should start with sha256:", etag.contains("sha256:")); + + // Verify Cache-Control header + String cacheControl = response.getHeaders().get("Cache-Control").get(0); + assertNotNull("Cache-Control header should be present", cacheControl); + assertTrue("Should allow caching", cacheControl.contains("max-age=3600")); + + // Verify response content + String content = new String(response.content().array(), StandardCharsets.UTF_8); + assertTrue("Should contain bundleVersion", content.contains("\"bundleVersion\":")); + assertTrue("Should contain grammarHash", content.contains("\"grammarHash\":")); + assertTrue("Should contain lexerSerializedATN", content.contains("\"lexerSerializedATN\":")); + assertTrue("Should contain parserSerializedATN", content.contains("\"parserSerializedATN\":")); + assertTrue("Should contain literalNames", content.contains("\"literalNames\":")); + assertTrue("Should contain symbolicNames", content.contains("\"symbolicNames\":")); + } + + @Test + public void testGetGrammar_Returns304WhenETagMatches() throws Exception { + // First request to get ETag + FakeRestRequest firstRequest = + new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) + .withMethod(RestRequest.Method.GET) + .withPath("/_plugins/_ppl/_grammar") + .build(); + + MockRestChannel firstChannel = new MockRestChannel(firstRequest, true); + action.prepareRequest(firstRequest, client).accept(firstChannel); + + RestResponse firstResponse = firstChannel.getResponse(); + String etag = firstResponse.getHeaders().get("ETag").get(0); + + // Second request with matching ETag + FakeRestRequest secondRequest = + new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) + .withMethod(RestRequest.Method.GET) + .withPath("/_plugins/_ppl/_grammar") + .withHeaders(Collections.singletonMap("If-None-Match", Collections.singletonList(etag))) + .build(); + + MockRestChannel secondChannel = new MockRestChannel(secondRequest, true); + action.prepareRequest(secondRequest, client).accept(secondChannel); + + RestResponse secondResponse = secondChannel.getResponse(); + assertEquals( + "Should return 304 Not Modified when ETag matches", + RestStatus.NOT_MODIFIED, + secondResponse.status()); + } + + @Test + public void testGetGrammar_ArtifactIsCached() throws Exception { + // Make two requests + FakeRestRequest request1 = + new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) + .withMethod(RestRequest.Method.GET) + .withPath("/_plugins/_ppl/_grammar") + .build(); + + MockRestChannel channel1 = new MockRestChannel(request1, true); + long startTime1 = System.currentTimeMillis(); + action.prepareRequest(request1, client).accept(channel1); + long elapsed1 = System.currentTimeMillis() - startTime1; + + // Second request should be faster (cached) + FakeRestRequest request2 = + new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) + .withMethod(RestRequest.Method.GET) + .withPath("/_plugins/_ppl/_grammar") + .build(); + + MockRestChannel channel2 = new MockRestChannel(request2, true); + long startTime2 = System.currentTimeMillis(); + action.prepareRequest(request2, client).accept(channel2); + long elapsed2 = System.currentTimeMillis() - startTime2; + + // Both should return same ETag + String etag1 = channel1.getResponse().getHeaders().get("ETag").get(0); + String etag2 = channel2.getResponse().getHeaders().get("ETag").get(0); + assertEquals("ETags should match", etag1, etag2); + + // Second request should be faster (artifact cached) + assertTrue( + "Second request should be faster due to caching (elapsed1=" + + elapsed1 + + "ms, elapsed2=" + + elapsed2 + + "ms)", + elapsed2 < elapsed1 || elapsed2 < 50); // Allow some variance + } + + @Test + public void testInvalidateCache() throws Exception { + // Get grammar + FakeRestRequest request1 = + new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) + .withMethod(RestRequest.Method.GET) + .withPath("/_plugins/_ppl/_grammar") + .build(); + + MockRestChannel channel1 = new MockRestChannel(request1, true); + action.prepareRequest(request1, client).accept(channel1); + String etag1 = channel1.getResponse().getHeaders().get("ETag").get(0); + + // Invalidate cache + action.invalidateCache(); + + // Get grammar again + FakeRestRequest request2 = + new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) + .withMethod(RestRequest.Method.GET) + .withPath("/_plugins/_ppl/_grammar") + .build(); + + MockRestChannel channel2 = new MockRestChannel(request2, true); + action.prepareRequest(request2, client).accept(channel2); + String etag2 = channel2.getResponse().getHeaders().get("ETag").get(0); + + // ETags should still match (same grammar) + assertEquals("ETags should match after cache invalidation", etag1, etag2); + } + + /** Mock RestChannel to capture responses */ + private static class MockRestChannel implements RestChannel { + private final RestRequest request; + private final boolean detailedErrorsEnabled; + private RestResponse response; + + MockRestChannel(RestRequest request, boolean detailedErrorsEnabled) { + this.request = request; + this.detailedErrorsEnabled = detailedErrorsEnabled; + } + + @Override + public void sendResponse(RestResponse response) { + this.response = response; + } + + public RestResponse getResponse() { + return response; + } + + @Override + public RestRequest request() { + return request; + } + + @Override + public boolean detailedErrorsEnabled() { + return detailedErrorsEnabled; + } + + @Override + public org.opensearch.core.xcontent.XContentBuilder newBuilder() { + return null; + } + + @Override + public org.opensearch.core.xcontent.XContentBuilder newErrorBuilder() { + return null; + } + + @Override + public org.opensearch.core.xcontent.XContentBuilder newBuilder( + org.opensearch.core.xcontent.MediaType mediaType, boolean useFiltering) { + return null; + } + + @Override + public ByteArrayOutputStream bytesOutput() { + return null; + } + } +} diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java new file mode 100644 index 00000000000..d6e11b40848 --- /dev/null +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java @@ -0,0 +1,122 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.autocomplete; + +import lombok.extern.log4j.Log4j2; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.Vocabulary; +import org.antlr.v4.runtime.atn.ATNSerializer; +import org.opensearch.sql.executor.autocomplete.AutocompleteArtifact; +import org.opensearch.sql.executor.autocomplete.GrammarArtifactBuilder; +import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLLexer; +import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; + +/** + * Builds autocomplete artifact bundle for PPL language. + * + *

This class: + * + *

    + *
  • Extracts ATN and vocabulary from generated PPL parser/lexer + *
  • Builds PPL-specific catalogs (commands, functions, keywords) + *
  • Creates token-to-category mapping for suggestion classification + *
  • Computes grammar hash for versioning + *
+ * + *

The resulting bundle is served via REST API and cached on frontend for client-side + * autocomplete. + */ +@Log4j2 +public class PPLAutocompleteArtifactBuilder { + + private static final String ANTLR_VERSION = "4.13.2"; + private static final String BUNDLE_VERSION = "1.0"; + private static final String RUNTIME_TARGET = "antlr4ng@3.x"; + + /** + * Build complete autocomplete artifact for PPL using optimized encoding. + * + *

This version uses: + * + *

    + *
  • Base64-encoded ATN (5-15x smaller than JSON int arrays) + *
  • Newline-packed string arrays (smaller than JSON arrays) + *
  • NO tokenTypeToCategory map (derive from naming conventions) + *
+ * + * @return AutocompleteArtifact ready for JSON serialization + */ + public AutocompleteArtifact buildArtifact() { + log.info("Building optimized PPL autocomplete artifact..."); + + // Create parser and lexer instances (with dummy input) + OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString("")); + CommonTokenStream tokens = new CommonTokenStream(lexer); + OpenSearchPPLParser parser = new OpenSearchPPLParser(tokens); + + // Do NOT use lexer.getSerializedATN().chars().toArray() — that gives + // the raw UTF-16 char encoding which has offset values and causes + // "state type 65535 is not valid" errors in the frontend deserializer. + int[] lexerATNArray = new ATNSerializer(lexer.getATN()).serialize().toArray(); + int[] parserATNArray = new ATNSerializer(parser.getATN()).serialize().toArray(); + + log.info("ATN data ready:"); + log.info(" Lexer ATN: {} elements", lexerATNArray.length); + log.info(" Parser ATN: {} elements", parserATNArray.length); + + // Extract vocabulary + Vocabulary vocabulary = parser.getVocabulary(); + String[] literalNames = GrammarArtifactBuilder.extractLiteralNames(vocabulary); + String[] symbolicNames = GrammarArtifactBuilder.extractSymbolicNames(vocabulary); + + // Extract rule names + String[] parserRuleNames = GrammarArtifactBuilder.extractParserRuleNames(parser); + String[] lexerRuleNames = GrammarArtifactBuilder.extractLexerRuleNames(lexer); + + // Extract channel/mode names from lexer + String[] channelNames = GrammarArtifactBuilder.extractChannelNames(lexer); + String[] modeNames = GrammarArtifactBuilder.extractModeNames(lexer); + + // Compute grammar hash from ATN data (always available, no classpath dependency) + String grammarHash = + GrammarArtifactBuilder.computeGrammarHash(lexerATNArray, parserATNArray, ANTLR_VERSION); + + // Build minimal artifact with only essential fields + AutocompleteArtifact artifact = + AutocompleteArtifact.builder() + .bundleVersion(BUNDLE_VERSION) + .grammarHash(grammarHash) + // ATN data (required for interpreters) + .lexerSerializedATN(lexerATNArray) + .parserSerializedATN(parserATNArray) + // Rule/channel/mode names (required for interpreters) + .lexerRuleNames(lexerRuleNames) + .parserRuleNames(parserRuleNames) + .channelNames(channelNames) + .modeNames(modeNames) + .startRuleIndex(0) // "root" rule + // Vocabulary (required for token display) + .literalNames(literalNames) + .symbolicNames(symbolicNames) + // NO tokenTypeToCategory - removed for size (derive from conventions) + .tokenTypeToCategory(null) + .build(); + + log.info( + "Built optimized PPL artifact: {} tokens, lexer ATN: {} elements, parser ATN: {} elements", + symbolicNames.length, + lexerATNArray.length, + parserATNArray.length); + log.info( + "ATN transfer size: ~{}KB (JSON int[]), ~{}KB (gzipped)", + (lexerATNArray.length + parserATNArray.length) * 4 / 1024, + (lexerATNArray.length + parserATNArray.length) * 2 / 1024); + + return artifact; + } + +} From ecb07e44849c8637abbfd946fa8b33a330625508 Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Thu, 19 Feb 2026 10:38:33 -0800 Subject: [PATCH 02/20] remove unused etag Signed-off-by: Eric Wei --- .../sql/plugin/rest/RestPPLGrammarAction.java | 52 --------------- .../plugin/rest/RestPPLGrammarActionTest.java | 66 +------------------ 2 files changed, 3 insertions(+), 115 deletions(-) diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java index c8908cd7228..c34e37ead00 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java @@ -34,31 +34,6 @@ *
  • Channel and mode names * * - *

    The bundle is cached in memory and versioned via ETag (grammar hash). Clients should cache - * locally and only refetch when ETag changes. - * - *

    Example request: - * - *

    - * GET /_plugins/_ppl/_grammar
    - * If-None-Match: "sha256:abc123..."
    - * 
    - * - *

    Response headers: - * - *

    - * HTTP/1.1 200 OK
    - * ETag: "sha256:abc123..."
    - * Cache-Control: public, max-age=3600
    - * Content-Type: application/json
    - * 
    - * - *

    Or if client has latest: - * - *

    - * HTTP/1.1 304 Not Modified
    - * ETag: "sha256:abc123..."
    - * 
    */ @Log4j2 public class RestPPLGrammarAction extends BaseRestHandler { @@ -90,37 +65,10 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli // Get or build artifact (lazy initialization) AutocompleteArtifact artifact = getOrBuildArtifact(); - String grammarHash = artifact.getGrammarHash(); - String ifNoneMatch = request.header("If-None-Match"); - - // Strip quotes from ETag if present ("sha256:..." → sha256:...) - if (ifNoneMatch != null && ifNoneMatch.startsWith("\"") && ifNoneMatch.endsWith("\"")) { - ifNoneMatch = ifNoneMatch.substring(1, ifNoneMatch.length() - 1); - } - - // Return 304 Not Modified if client has latest version - if (grammarHash.equals(ifNoneMatch)) { - log.debug("Client has latest grammar (ETag match), returning 304"); - BytesRestResponse response = - new BytesRestResponse(RestStatus.NOT_MODIFIED, "application/json", ""); - response.addHeader("ETag", "\"" + grammarHash + "\""); - response.addHeader("Cache-Control", "public, max-age=3600"); - channel.sendResponse(response); - return; - } - - // Serialize artifact to JSON - log.debug("Serializing grammar to JSON (hash: {})", grammarHash); XContentBuilder builder = channel.newBuilder(); serializeArtifact(builder, artifact); BytesRestResponse response = new BytesRestResponse(RestStatus.OK, builder); - - // Add caching headers - response.addHeader("ETag", "\"" + grammarHash + "\""); - response.addHeader("Cache-Control", "public, max-age=3600"); - response.addHeader("Content-Type", "application/json; charset=UTF-8"); - log.info("Returning PPL grammar (size: {} bytes)", response.content().length()); channel.sendResponse(response); diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java index cfcd8c9892e..b0f023b1923 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -12,7 +12,6 @@ import java.io.ByteArrayOutputStream; import java.nio.charset.StandardCharsets; -import java.util.Collections; import org.junit.Before; import org.junit.Test; import org.opensearch.core.rest.RestStatus; @@ -31,7 +30,6 @@ * *
      *
    • 200 OK response with grammar bundle - *
    • 304 Not Modified when ETag matches *
    • Grammar structure validation *
    • Cache behavior *
    @@ -62,36 +60,19 @@ public void testRoutes() { @Test public void testGetGrammar_ReturnsBundle() throws Exception { - // Create request without ETag FakeRestRequest request = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) .withMethod(RestRequest.Method.GET) .withPath("/_plugins/_ppl/_grammar") - .withHeaders(Collections.emptyMap()) .build(); - // Create mock channel to capture response MockRestChannel channel = new MockRestChannel(request, true); - - // Execute request action.prepareRequest(request, client).accept(channel); - // Verify response RestResponse response = channel.getResponse(); assertNotNull("Response should not be null", response); assertEquals("Should return 200 OK", RestStatus.OK, response.status()); - // Verify ETag header present - String etag = response.getHeaders().get("ETag").get(0); - assertNotNull("ETag header should be present", etag); - assertTrue("ETag should start with sha256:", etag.contains("sha256:")); - - // Verify Cache-Control header - String cacheControl = response.getHeaders().get("Cache-Control").get(0); - assertNotNull("Cache-Control header should be present", cacheControl); - assertTrue("Should allow caching", cacheControl.contains("max-age=3600")); - - // Verify response content String content = new String(response.content().array(), StandardCharsets.UTF_8); assertTrue("Should contain bundleVersion", content.contains("\"bundleVersion\":")); assertTrue("Should contain grammarHash", content.contains("\"grammarHash\":")); @@ -101,39 +82,6 @@ public void testGetGrammar_ReturnsBundle() throws Exception { assertTrue("Should contain symbolicNames", content.contains("\"symbolicNames\":")); } - @Test - public void testGetGrammar_Returns304WhenETagMatches() throws Exception { - // First request to get ETag - FakeRestRequest firstRequest = - new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) - .withMethod(RestRequest.Method.GET) - .withPath("/_plugins/_ppl/_grammar") - .build(); - - MockRestChannel firstChannel = new MockRestChannel(firstRequest, true); - action.prepareRequest(firstRequest, client).accept(firstChannel); - - RestResponse firstResponse = firstChannel.getResponse(); - String etag = firstResponse.getHeaders().get("ETag").get(0); - - // Second request with matching ETag - FakeRestRequest secondRequest = - new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) - .withMethod(RestRequest.Method.GET) - .withPath("/_plugins/_ppl/_grammar") - .withHeaders(Collections.singletonMap("If-None-Match", Collections.singletonList(etag))) - .build(); - - MockRestChannel secondChannel = new MockRestChannel(secondRequest, true); - action.prepareRequest(secondRequest, client).accept(secondChannel); - - RestResponse secondResponse = secondChannel.getResponse(); - assertEquals( - "Should return 304 Not Modified when ETag matches", - RestStatus.NOT_MODIFIED, - secondResponse.status()); - } - @Test public void testGetGrammar_ArtifactIsCached() throws Exception { // Make two requests @@ -160,11 +108,6 @@ public void testGetGrammar_ArtifactIsCached() throws Exception { action.prepareRequest(request2, client).accept(channel2); long elapsed2 = System.currentTimeMillis() - startTime2; - // Both should return same ETag - String etag1 = channel1.getResponse().getHeaders().get("ETag").get(0); - String etag2 = channel2.getResponse().getHeaders().get("ETag").get(0); - assertEquals("ETags should match", etag1, etag2); - // Second request should be faster (artifact cached) assertTrue( "Second request should be faster due to caching (elapsed1=" @@ -186,12 +129,12 @@ public void testInvalidateCache() throws Exception { MockRestChannel channel1 = new MockRestChannel(request1, true); action.prepareRequest(request1, client).accept(channel1); - String etag1 = channel1.getResponse().getHeaders().get("ETag").get(0); + assertEquals(RestStatus.OK, channel1.getResponse().status()); // Invalidate cache action.invalidateCache(); - // Get grammar again + // Get grammar again — should still return 200 OK with same content FakeRestRequest request2 = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) .withMethod(RestRequest.Method.GET) @@ -200,10 +143,7 @@ public void testInvalidateCache() throws Exception { MockRestChannel channel2 = new MockRestChannel(request2, true); action.prepareRequest(request2, client).accept(channel2); - String etag2 = channel2.getResponse().getHeaders().get("ETag").get(0); - - // ETags should still match (same grammar) - assertEquals("ETags should match after cache invalidation", etag1, etag2); + assertEquals(RestStatus.OK, channel2.getResponse().status()); } /** Mock RestChannel to capture responses */ From 380945bdce2a275dd9c12177d8f1bb44285057f2 Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Thu, 19 Feb 2026 10:56:20 -0800 Subject: [PATCH 03/20] cleanup on unused code Signed-off-by: Eric Wei --- .../autocomplete/AutocompleteArtifact.java | 121 ----------- .../autocomplete/GrammarArtifactBuilder.java | 202 ------------------ .../executor/autocomplete/GrammarBundle.java | 53 +++++ .../sql/plugin/rest/RestPPLGrammarAction.java | 93 ++++---- .../plugin/rest/RestPPLGrammarActionTest.java | 2 +- .../PPLAutocompleteArtifactBuilder.java | 122 ----------- .../autocomplete/PPLGrammarBundleBuilder.java | 98 +++++++++ 7 files changed, 197 insertions(+), 494 deletions(-) delete mode 100644 core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java delete mode 100644 core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java create mode 100644 core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java delete mode 100644 ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java create mode 100644 ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java diff --git a/core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java b/core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java deleted file mode 100644 index 38b87765cbd..00000000000 --- a/core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.executor.autocomplete; - -import java.util.Map; -import lombok.Builder; -import lombok.Data; - -/** - * Autocomplete artifact bundle containing everything needed for client-side grammar-based - * autocomplete. - * - *

    This bundle is language-agnostic and can be used for PPL, SQL, or any ANTLR-based language. It - * contains: - * - *

      - *
    • Serialized ATN data for lexer and parser (for antlr4ng runtime) - *
    • Vocabulary and rule names (for token/rule interpretation) - *
    • Static catalogs (commands, functions, keywords, snippets) - *
    • Token classification mapping (for suggestion categorization) - *
    - * - *

    Frontend uses this bundle to: - * - *

      - *
    1. Deserialize ATNs with antlr4ng - *
    2. Create LexerInterpreter and ParserInterpreter - *
    3. Use antlr4-c3 to find valid tokens at cursor - *
    4. Generate suggestions from catalogs - *
    - */ -@Data -@Builder -public class AutocompleteArtifact { - - // ============================================================================ - // Identity & versioning - // ============================================================================ - - /** Bundle version (increment when format changes) */ - private String bundleVersion; - - /** - * Hash of grammar sources + ANTLR version. Used for cache validation via ETag. Format: - * "sha256:abc123..." - */ - private String grammarHash; - - // ============================================================================ - // Lexer ATN & metadata - // ============================================================================ - - /** - * Serialized lexer ATN as int array. Frontend uses directly: new - * ATNDeserializer().deserialize(lexerSerializedATN) - */ - private int[] lexerSerializedATN; - - /** Lexer rule names (e.g., ["SEARCH", "WHERE", "PIPE", ...]) */ - private String[] lexerRuleNames; - - /** Channel names (e.g., ["DEFAULT_TOKEN_CHANNEL", "WHITESPACE", "ERRORCHANNEL"]) */ - private String[] channelNames; - - /** Mode names (e.g., ["DEFAULT_MODE"]) */ - private String[] modeNames; - - // ============================================================================ - // Parser ATN & metadata - // ============================================================================ - - /** - * Serialized parser ATN as int array. Frontend uses directly: new - * ATNDeserializer().deserialize(parserSerializedATN) - */ - private int[] parserSerializedATN; - - /** Parser rule names (e.g., ["root", "pplStatement", "commands", ...]) */ - private String[] parserRuleNames; - - /** Start rule index (usually 0 for "root" rule) */ - private int startRuleIndex; - - // ============================================================================ - // Vocabulary - // ============================================================================ - - /** - * Literal names from vocabulary. Index = token type. Values are literal tokens with quotes, or - * null. Example: ["", "'search'", "'where'", "'|'", null, null, ...] - */ - private String[] literalNames; - - /** - * Symbolic names from vocabulary. Index = token type. Values are token symbolic names, or null. - * Example: ["", "SEARCH", "WHERE", "PIPE", "ID", "INTEGER", ...] - */ - private String[] symbolicNames; - - /** - * Optional display names (user-friendly token names). If not provided, frontend uses literal or - * symbolic names. - */ - private String[] displayNames; - - // ============================================================================ - // Token classification - // ============================================================================ - - /** - * Mapping from token symbolic name to suggestion category. Used by frontend to classify antlr4-c3 - * token candidates into suggestion types. - * - *

    Example: { "SEARCH": "COMMAND", "WHERE": "COMMAND", "BY": "KEYWORD", "COUNT": "FUNCTION", - * "AND": "OPERATOR" } - */ - private Map tokenTypeToCategory; -} diff --git a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java b/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java deleted file mode 100644 index 5bbffea1e9e..00000000000 --- a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.executor.autocomplete; - -import java.nio.charset.StandardCharsets; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import lombok.extern.log4j.Log4j2; -import org.antlr.v4.runtime.Lexer; -import org.antlr.v4.runtime.Parser; -import org.antlr.v4.runtime.Vocabulary; - -/** - * Utility class for extracting ANTLR grammar artifacts (ATN, vocabulary, rule names) from generated - * parser/lexer classes. - * - *

    This class handles the low-level details of: - * - *

      - *
    • Converting ANTLR's Java String ATN format to int[] for JSON transfer - *
    • Extracting vocabulary (literal and symbolic names) - *
    • Extracting rule names via public ANTLR APIs - *
    • Computing grammar hash for versioning - *
    - * - *

    Language-specific builders (PPL, SQL) use this class to build their autocomplete bundles. - */ -@Log4j2 -public class GrammarArtifactBuilder { - - /** - * Extract literal names from vocabulary. - * - *

    Returns array where index = token type, value = literal token (with quotes) or null. - * - *

    Example: ["", "'search'", "'where'", "'|'", null, ...] - * - * @param vocabulary Parser vocabulary - * @return Array of literal names - */ - public static String[] extractLiteralNames(Vocabulary vocabulary) { - int maxTokenType = vocabulary.getMaxTokenType(); - String[] names = new String[maxTokenType + 1]; - - for (int i = 0; i <= maxTokenType; i++) { - String literal = vocabulary.getLiteralName(i); - // Keep nulls as nulls (no literal representation) - names[i] = literal; - } - - log.debug("Extracted {} literal names", names.length); - return names; - } - - /** - * Extract symbolic names from vocabulary. - * - *

    Returns array where index = token type, value = symbolic token name or null. - * - *

    Example: ["", "SEARCH", "WHERE", "PIPE", "ID", ...] - * - * @param vocabulary Parser vocabulary - * @return Array of symbolic names - */ - public static String[] extractSymbolicNames(Vocabulary vocabulary) { - int maxTokenType = vocabulary.getMaxTokenType(); - String[] names = new String[maxTokenType + 1]; - - for (int i = 0; i <= maxTokenType; i++) { - String symbolic = vocabulary.getSymbolicName(i); - // Keep nulls as nulls (no symbolic name) - names[i] = symbolic; - } - - log.debug("Extracted {} symbolic names", names.length); - return names; - } - - /** - * Extract rule names from parser. - * - *

    Parser.getRuleNames() is public API. - * - * @param parser Parser instance - * @return Array of rule names - */ - public static String[] extractParserRuleNames(Parser parser) { - String[] ruleNames = parser.getRuleNames(); - log.debug("Extracted {} parser rule names", ruleNames.length); - return ruleNames; - } - - /** - * Extract rule names from lexer. - * - *

    Lexer.getRuleNames() is public API (Lexer extends Recognizer). - * - * @param lexer Lexer instance - * @return Array of lexer rule names - */ - public static String[] extractLexerRuleNames(Lexer lexer) { - String[] ruleNames = lexer.getRuleNames(); - log.debug("Extracted {} lexer rule names", ruleNames.length); - return ruleNames; - } - - /** - * Extract channel names from lexer. - * - *

    ANTLR 4.x exposes channel names via getChannelNames() method in generated lexers. - * This method dynamically extracts the actual channel names from the lexer instance. - * - * @param lexer Lexer instance - * @return Array of channel names - */ - public static String[] extractChannelNames(Lexer lexer) { - String[] channelNames = lexer.getChannelNames(); - log.debug("Extracted {} channel names from lexer", channelNames.length); - return channelNames; - } - - /** - * Extract mode names from lexer. - * - *

    ANTLR 4.x exposes mode names via getModeNames() method in generated lexers. - * This method dynamically extracts the actual mode names from the lexer instance. - * - * @param lexer Lexer instance - * @return Array of mode names - */ - public static String[] extractModeNames(Lexer lexer) { - String[] modeNames = lexer.getModeNames(); - log.debug("Extracted {} mode names from lexer", modeNames.length); - return modeNames; - } - - /** - * Compute grammar hash from ATN data (recommended). - * - *

    This method hashes the serialized ATN arrays directly, which: - * - *

      - *
    • Always available at runtime (no classpath dependencies) - *
    • Reflects the actual artifact being served - *
    • Changes when grammar changes (ATN structure changes) - *
    - * - * @param lexerATN Serialized lexer ATN as int array - * @param parserATN Serialized parser ATN as int array - * @param antlrVersion ANTLR tool version (e.g., "4.13.2") - * @return Hash string in format "sha256:abc123..." - */ - public static String computeGrammarHash(int[] lexerATN, int[] parserATN, String antlrVersion) { - try { - MessageDigest digest = MessageDigest.getInstance("SHA-256"); - - // Hash lexer ATN data - for (int value : lexerATN) { - digest.update((byte) (value >> 8)); - digest.update((byte) value); - } - - // Hash parser ATN data - for (int value : parserATN) { - digest.update((byte) (value >> 8)); - digest.update((byte) value); - } - - // Hash ANTLR version to detect generator changes - digest.update(antlrVersion.getBytes(StandardCharsets.UTF_8)); - - // Compute hash - byte[] hashBytes = digest.digest(); - String result = "sha256:" + bytesToHex(hashBytes); - - log.info("Computed grammar hash from ATN data: {}", result); - return result; - - } catch (NoSuchAlgorithmException e) { - // SHA-256 is required by Java specification, this should never happen - throw new IllegalStateException("SHA-256 algorithm not available", e); - } - } - - - /** - * Convert byte array to hex string. - * - * @param bytes Input bytes - * @return Hex string (lowercase) - */ - private static String bytesToHex(byte[] bytes) { - StringBuilder sb = new StringBuilder(bytes.length * 2); - for (byte b : bytes) { - sb.append(String.format("%02x", b & 0xFF)); - } - return sb.toString(); - } -} diff --git a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java b/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java new file mode 100644 index 00000000000..6e4a456a22e --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java @@ -0,0 +1,53 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.executor.autocomplete; + +import lombok.Builder; +import lombok.Data; + +/** + * Grammar bundle containing everything needed for client-side ANTLR-based parsing. + * + *

    Includes serialized ATN data (lexer + parser), vocabulary, and rule/channel/mode names. + * Language-agnostic — usable for PPL, SQL, or any ANTLR4 grammar. + */ +@Data +@Builder +public class GrammarBundle { + + /** Bundle format version. Increment when the schema changes. */ + private String bundleVersion; + + /** SHA-256 hash of the serialized ATN data. Clients may use this to detect grammar changes. */ + private String grammarHash; + + /** Serialized lexer ATN as int array (ATNSerializer output). */ + private int[] lexerSerializedATN; + + /** Lexer rule names. */ + private String[] lexerRuleNames; + + /** Channel names (e.g. DEFAULT_TOKEN_CHANNEL, HIDDEN). */ + private String[] channelNames; + + /** Mode names (e.g. DEFAULT_MODE). */ + private String[] modeNames; + + /** Serialized parser ATN as int array (ATNSerializer output). */ + private int[] parserSerializedATN; + + /** Parser rule names. */ + private String[] parserRuleNames; + + /** Start rule index (0 = root rule). */ + private int startRuleIndex; + + /** Literal token names indexed by token type (e.g. "'search'", "'|'"). */ + private String[] literalNames; + + /** Symbolic token names indexed by token type (e.g. "SEARCH", "PIPE"). */ + private String[] symbolicNames; +} diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java index c34e37ead00..0673dd7d467 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java @@ -16,8 +16,8 @@ import org.opensearch.rest.BaseRestHandler; import org.opensearch.rest.BytesRestResponse; import org.opensearch.rest.RestRequest; -import org.opensearch.sql.executor.autocomplete.AutocompleteArtifact; -import org.opensearch.sql.ppl.autocomplete.PPLAutocompleteArtifactBuilder; +import org.opensearch.sql.executor.autocomplete.GrammarBundle; +import org.opensearch.sql.ppl.autocomplete.PPLGrammarBundleBuilder; import org.opensearch.transport.client.node.NodeClient; /** @@ -40,9 +40,9 @@ public class RestPPLGrammarAction extends BaseRestHandler { private static final String ENDPOINT_PATH = "/_plugins/_ppl/_grammar"; - // Lazy-initialized singleton artifact (built once per JVM lifecycle) - private volatile AutocompleteArtifact cachedArtifact; - private final Object artifactLock = new Object(); + // Lazy-initialized singleton bundle (built once per JVM lifecycle) + private volatile GrammarBundle cachedBundle; + private final Object bundleLock = new Object(); @Override public String getName() { @@ -62,11 +62,10 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli return channel -> { try { - // Get or build artifact (lazy initialization) - AutocompleteArtifact artifact = getOrBuildArtifact(); + GrammarBundle bundle = getOrBuildBundle(); XContentBuilder builder = channel.newBuilder(); - serializeArtifact(builder, artifact); + serializeBundle(builder, bundle); BytesRestResponse response = new BytesRestResponse(RestStatus.OK, builder); log.info("Returning PPL grammar (size: {} bytes)", response.content().length()); @@ -80,95 +79,93 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli } /** - * Get cached artifact or build it if not initialized. + * Get cached bundle or build it if not yet initialized. * *

    Thread-safe lazy initialization with double-checked locking. */ - private AutocompleteArtifact getOrBuildArtifact() { + private GrammarBundle getOrBuildBundle() { // First check without lock (common case: already initialized) - if (cachedArtifact != null) { - return cachedArtifact; + if (cachedBundle != null) { + return cachedBundle; } // Acquire lock for initialization - synchronized (artifactLock) { + synchronized (bundleLock) { // Double-check after acquiring lock - if (cachedArtifact == null) { - log.info("Building PPL grammar artifact (first request)..."); + if (cachedBundle == null) { + log.info("Building PPL grammar bundle (first request)..."); long startTime = System.currentTimeMillis(); - PPLAutocompleteArtifactBuilder builder = new PPLAutocompleteArtifactBuilder(); - cachedArtifact = builder.buildArtifact(); + PPLGrammarBundleBuilder builder = new PPLGrammarBundleBuilder(); + cachedBundle = builder.build(); long elapsed = System.currentTimeMillis() - startTime; - log.info("Built PPL grammar in {}ms (hash: {})", elapsed, cachedArtifact.getGrammarHash()); + log.info("Built PPL grammar in {}ms (hash: {})", elapsed, cachedBundle.getGrammarHash()); } - return cachedArtifact; + return cachedBundle; } } /** - * Invalidate cached artifact (for testing or grammar updates). + * Invalidate cached bundle (for testing or grammar updates). * *

    Note: This only affects the current node. In a multi-node cluster, each node maintains its * own cache. */ public void invalidateCache() { - synchronized (artifactLock) { - log.info("Invalidating cached PPL grammar"); - cachedArtifact = null; + synchronized (bundleLock) { + log.info("Invalidating cached PPL grammar bundle"); + cachedBundle = null; } } /** - * Manually serialize AutocompleteArtifact to JSON. - * - *

    Outputs ATN data and arrays as standard JSON arrays. + * Serialize {@link GrammarBundle} to JSON. */ - private void serializeArtifact(XContentBuilder builder, AutocompleteArtifact artifact) + private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) throws IOException { builder.startObject(); // Identity & versioning - builder.field("bundleVersion", artifact.getBundleVersion()); - builder.field("grammarHash", artifact.getGrammarHash()); - builder.field("startRuleIndex", artifact.getStartRuleIndex()); + builder.field("bundleVersion", bundle.getBundleVersion()); + builder.field("grammarHash", bundle.getGrammarHash()); + builder.field("startRuleIndex", bundle.getStartRuleIndex()); // Lexer ATN & metadata - if (artifact.getLexerSerializedATN() != null) { - builder.field("lexerSerializedATN", artifact.getLexerSerializedATN()); - log.debug("Lexer ATN: {} elements", artifact.getLexerSerializedATN().length); + if (bundle.getLexerSerializedATN() != null) { + builder.field("lexerSerializedATN", bundle.getLexerSerializedATN()); + log.debug("Lexer ATN: {} elements", bundle.getLexerSerializedATN().length); } - if (artifact.getLexerRuleNames() != null) { - builder.field("lexerRuleNames", artifact.getLexerRuleNames()); + if (bundle.getLexerRuleNames() != null) { + builder.field("lexerRuleNames", bundle.getLexerRuleNames()); } - if (artifact.getChannelNames() != null) { - builder.field("channelNames", artifact.getChannelNames()); + if (bundle.getChannelNames() != null) { + builder.field("channelNames", bundle.getChannelNames()); } - if (artifact.getModeNames() != null) { - builder.field("modeNames", artifact.getModeNames()); + if (bundle.getModeNames() != null) { + builder.field("modeNames", bundle.getModeNames()); } // Parser ATN & metadata - if (artifact.getParserSerializedATN() != null) { - builder.field("parserSerializedATN", artifact.getParserSerializedATN()); - log.debug("Parser ATN: {} elements", artifact.getParserSerializedATN().length); + if (bundle.getParserSerializedATN() != null) { + builder.field("parserSerializedATN", bundle.getParserSerializedATN()); + log.debug("Parser ATN: {} elements", bundle.getParserSerializedATN().length); } - if (artifact.getParserRuleNames() != null) { - builder.field("parserRuleNames", artifact.getParserRuleNames()); + if (bundle.getParserRuleNames() != null) { + builder.field("parserRuleNames", bundle.getParserRuleNames()); } // Vocabulary - if (artifact.getLiteralNames() != null) { - builder.field("literalNames", artifact.getLiteralNames()); + if (bundle.getLiteralNames() != null) { + builder.field("literalNames", bundle.getLiteralNames()); } - if (artifact.getSymbolicNames() != null) { - builder.field("symbolicNames", artifact.getSymbolicNames()); + if (bundle.getSymbolicNames() != null) { + builder.field("symbolicNames", bundle.getSymbolicNames()); } builder.endObject(); diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java index b0f023b1923..6f7ed82dc75 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -108,7 +108,7 @@ public void testGetGrammar_ArtifactIsCached() throws Exception { action.prepareRequest(request2, client).accept(channel2); long elapsed2 = System.currentTimeMillis() - startTime2; - // Second request should be faster (artifact cached) + // Second request should be faster (bundle cached) assertTrue( "Second request should be faster due to caching (elapsed1=" + elapsed1 diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java deleted file mode 100644 index d6e11b40848..00000000000 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.ppl.autocomplete; - -import lombok.extern.log4j.Log4j2; -import org.antlr.v4.runtime.CharStreams; -import org.antlr.v4.runtime.CommonTokenStream; -import org.antlr.v4.runtime.Vocabulary; -import org.antlr.v4.runtime.atn.ATNSerializer; -import org.opensearch.sql.executor.autocomplete.AutocompleteArtifact; -import org.opensearch.sql.executor.autocomplete.GrammarArtifactBuilder; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLLexer; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; - -/** - * Builds autocomplete artifact bundle for PPL language. - * - *

    This class: - * - *

      - *
    • Extracts ATN and vocabulary from generated PPL parser/lexer - *
    • Builds PPL-specific catalogs (commands, functions, keywords) - *
    • Creates token-to-category mapping for suggestion classification - *
    • Computes grammar hash for versioning - *
    - * - *

    The resulting bundle is served via REST API and cached on frontend for client-side - * autocomplete. - */ -@Log4j2 -public class PPLAutocompleteArtifactBuilder { - - private static final String ANTLR_VERSION = "4.13.2"; - private static final String BUNDLE_VERSION = "1.0"; - private static final String RUNTIME_TARGET = "antlr4ng@3.x"; - - /** - * Build complete autocomplete artifact for PPL using optimized encoding. - * - *

    This version uses: - * - *

      - *
    • Base64-encoded ATN (5-15x smaller than JSON int arrays) - *
    • Newline-packed string arrays (smaller than JSON arrays) - *
    • NO tokenTypeToCategory map (derive from naming conventions) - *
    - * - * @return AutocompleteArtifact ready for JSON serialization - */ - public AutocompleteArtifact buildArtifact() { - log.info("Building optimized PPL autocomplete artifact..."); - - // Create parser and lexer instances (with dummy input) - OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString("")); - CommonTokenStream tokens = new CommonTokenStream(lexer); - OpenSearchPPLParser parser = new OpenSearchPPLParser(tokens); - - // Do NOT use lexer.getSerializedATN().chars().toArray() — that gives - // the raw UTF-16 char encoding which has offset values and causes - // "state type 65535 is not valid" errors in the frontend deserializer. - int[] lexerATNArray = new ATNSerializer(lexer.getATN()).serialize().toArray(); - int[] parserATNArray = new ATNSerializer(parser.getATN()).serialize().toArray(); - - log.info("ATN data ready:"); - log.info(" Lexer ATN: {} elements", lexerATNArray.length); - log.info(" Parser ATN: {} elements", parserATNArray.length); - - // Extract vocabulary - Vocabulary vocabulary = parser.getVocabulary(); - String[] literalNames = GrammarArtifactBuilder.extractLiteralNames(vocabulary); - String[] symbolicNames = GrammarArtifactBuilder.extractSymbolicNames(vocabulary); - - // Extract rule names - String[] parserRuleNames = GrammarArtifactBuilder.extractParserRuleNames(parser); - String[] lexerRuleNames = GrammarArtifactBuilder.extractLexerRuleNames(lexer); - - // Extract channel/mode names from lexer - String[] channelNames = GrammarArtifactBuilder.extractChannelNames(lexer); - String[] modeNames = GrammarArtifactBuilder.extractModeNames(lexer); - - // Compute grammar hash from ATN data (always available, no classpath dependency) - String grammarHash = - GrammarArtifactBuilder.computeGrammarHash(lexerATNArray, parserATNArray, ANTLR_VERSION); - - // Build minimal artifact with only essential fields - AutocompleteArtifact artifact = - AutocompleteArtifact.builder() - .bundleVersion(BUNDLE_VERSION) - .grammarHash(grammarHash) - // ATN data (required for interpreters) - .lexerSerializedATN(lexerATNArray) - .parserSerializedATN(parserATNArray) - // Rule/channel/mode names (required for interpreters) - .lexerRuleNames(lexerRuleNames) - .parserRuleNames(parserRuleNames) - .channelNames(channelNames) - .modeNames(modeNames) - .startRuleIndex(0) // "root" rule - // Vocabulary (required for token display) - .literalNames(literalNames) - .symbolicNames(symbolicNames) - // NO tokenTypeToCategory - removed for size (derive from conventions) - .tokenTypeToCategory(null) - .build(); - - log.info( - "Built optimized PPL artifact: {} tokens, lexer ATN: {} elements, parser ATN: {} elements", - symbolicNames.length, - lexerATNArray.length, - parserATNArray.length); - log.info( - "ATN transfer size: ~{}KB (JSON int[]), ~{}KB (gzipped)", - (lexerATNArray.length + parserATNArray.length) * 4 / 1024, - (lexerATNArray.length + parserATNArray.length) * 2 / 1024); - - return artifact; - } - -} diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java new file mode 100644 index 00000000000..ce73f0d3b5f --- /dev/null +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java @@ -0,0 +1,98 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.autocomplete; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import lombok.extern.log4j.Log4j2; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.Vocabulary; +import org.antlr.v4.runtime.atn.ATNSerializer; +import org.opensearch.sql.executor.autocomplete.GrammarBundle; +import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLLexer; +import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; + +/** Builds the {@link GrammarBundle} for the PPL language from the generated ANTLR lexer/parser. */ +@Log4j2 +public class PPLGrammarBundleBuilder { + + private static final String ANTLR_VERSION = "4.13.2"; + private static final String BUNDLE_VERSION = "1.0"; + + /** + * Build the PPL grammar bundle. + * + *

    Uses {@link ATNSerializer} to re-serialize the ATN into the int[] format expected by the + * antlr4ng runtime on the frontend. Do NOT use {@code lexer.getSerializedATN().chars().toArray()} + * — that yields raw UTF-16 char values which cause "state type 65535 is not valid" errors. + * + * @return {@link GrammarBundle} ready for JSON serialization + */ + public GrammarBundle build() { + log.info("Building PPL grammar bundle..."); + + OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString("")); + CommonTokenStream tokens = new CommonTokenStream(lexer); + OpenSearchPPLParser parser = new OpenSearchPPLParser(tokens); + + int[] lexerATN = new ATNSerializer(lexer.getATN()).serialize().toArray(); + int[] parserATN = new ATNSerializer(parser.getATN()).serialize().toArray(); + + log.info("Lexer ATN: {} elements, Parser ATN: {} elements", lexerATN.length, parserATN.length); + + Vocabulary vocabulary = parser.getVocabulary(); + int maxTokenType = vocabulary.getMaxTokenType(); + String[] literalNames = new String[maxTokenType + 1]; + String[] symbolicNames = new String[maxTokenType + 1]; + for (int i = 0; i <= maxTokenType; i++) { + literalNames[i] = vocabulary.getLiteralName(i); + symbolicNames[i] = vocabulary.getSymbolicName(i); + } + + GrammarBundle bundle = + GrammarBundle.builder() + .bundleVersion(BUNDLE_VERSION) + .grammarHash(computeGrammarHash(lexerATN, parserATN)) + .lexerSerializedATN(lexerATN) + .parserSerializedATN(parserATN) + .lexerRuleNames(lexer.getRuleNames()) + .parserRuleNames(parser.getRuleNames()) + .channelNames(lexer.getChannelNames()) + .modeNames(lexer.getModeNames()) + .startRuleIndex(0) + .literalNames(literalNames) + .symbolicNames(symbolicNames) + .build(); + + log.info("Built PPL grammar bundle: {} tokens", symbolicNames.length); + return bundle; + } + + private static String computeGrammarHash(int[] lexerATN, int[] parserATN) { + try { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + for (int v : lexerATN) { + digest.update((byte) (v >> 8)); + digest.update((byte) v); + } + for (int v : parserATN) { + digest.update((byte) (v >> 8)); + digest.update((byte) v); + } + digest.update(ANTLR_VERSION.getBytes(StandardCharsets.UTF_8)); + byte[] hash = digest.digest(); + StringBuilder sb = new StringBuilder("sha256:"); + for (byte b : hash) { + sb.append(String.format("%02x", b & 0xFF)); + } + return sb.toString(); + } catch (NoSuchAlgorithmException e) { + throw new IllegalStateException("SHA-256 not available", e); + } + } +} From b9f351351966a06cb03215e349a3b324a760506e Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Thu, 19 Feb 2026 11:05:30 -0800 Subject: [PATCH 04/20] cleanup on comments and debug logging Signed-off-by: Eric Wei --- .../executor/autocomplete/GrammarBundle.java | 9 +--- .../sql/plugin/rest/RestPPLGrammarAction.java | 45 ++--------------- .../plugin/rest/RestPPLGrammarActionTest.java | 20 +------- .../autocomplete/PPLGrammarBundleBuilder.java | 48 +++++++------------ 4 files changed, 25 insertions(+), 97 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java b/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java index 6e4a456a22e..7b3720881aa 100644 --- a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java +++ b/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java @@ -8,17 +8,12 @@ import lombok.Builder; import lombok.Data; -/** - * Grammar bundle containing everything needed for client-side ANTLR-based parsing. - * - *

    Includes serialized ATN data (lexer + parser), vocabulary, and rule/channel/mode names. - * Language-agnostic — usable for PPL, SQL, or any ANTLR4 grammar. - */ +/** Response payload for the {@code GET /_plugins/_ppl/_grammar} endpoint. */ @Data @Builder public class GrammarBundle { - /** Bundle format version. Increment when the schema changes. */ + /** Bundle format version. */ private String bundleVersion; /** SHA-256 hash of the serialized ATN data. Clients may use this to detect grammar changes. */ diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java index 0673dd7d467..5cc8c72a631 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java @@ -20,21 +20,7 @@ import org.opensearch.sql.ppl.autocomplete.PPLGrammarBundleBuilder; import org.opensearch.transport.client.node.NodeClient; -/** - * REST handler for PPL grammar metadata endpoint. - * - *

    Endpoint: GET /_plugins/_ppl/_grammar - * - *

    Returns grammar data for client-side parsing, autocomplete, syntax highlighting, etc: - * - *

      - *
    • Serialized ANTLR ATN data (lexer + parser) - *
    • Vocabulary (literal and symbolic names) - *
    • Rule names (parser and lexer) - *
    • Channel and mode names - *
    - * - */ +/** REST handler for {@code GET /_plugins/_ppl/_grammar}. */ @Log4j2 public class RestPPLGrammarAction extends BaseRestHandler { @@ -58,8 +44,6 @@ public List routes() { protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { - log.debug("Received PPL grammar request"); - return channel -> { try { GrammarBundle bundle = getOrBuildBundle(); @@ -67,9 +51,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli XContentBuilder builder = channel.newBuilder(); serializeBundle(builder, bundle); - BytesRestResponse response = new BytesRestResponse(RestStatus.OK, builder); - log.info("Returning PPL grammar (size: {} bytes)", response.content().length()); - channel.sendResponse(response); + channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder)); } catch (Exception e) { log.error("Error building or serializing PPL grammar", e); @@ -78,20 +60,13 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli }; } - /** - * Get cached bundle or build it if not yet initialized. - * - *

    Thread-safe lazy initialization with double-checked locking. - */ + // Thread-safe lazy initialization with double-checked locking. private GrammarBundle getOrBuildBundle() { - // First check without lock (common case: already initialized) if (cachedBundle != null) { return cachedBundle; } - - // Acquire lock for initialization synchronized (bundleLock) { - // Double-check after acquiring lock + // double-check after acquiring lock if (cachedBundle == null) { log.info("Building PPL grammar bundle (first request)..."); long startTime = System.currentTimeMillis(); @@ -106,12 +81,7 @@ private GrammarBundle getOrBuildBundle() { } } - /** - * Invalidate cached bundle (for testing or grammar updates). - * - *

    Note: This only affects the current node. In a multi-node cluster, each node maintains its - * own cache. - */ + /** Invalidate the cached bundle, forcing a rebuild on the next request. */ public void invalidateCache() { synchronized (bundleLock) { log.info("Invalidating cached PPL grammar bundle"); @@ -119,9 +89,6 @@ public void invalidateCache() { } } - /** - * Serialize {@link GrammarBundle} to JSON. - */ private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) throws IOException { builder.startObject(); @@ -134,7 +101,6 @@ private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) // Lexer ATN & metadata if (bundle.getLexerSerializedATN() != null) { builder.field("lexerSerializedATN", bundle.getLexerSerializedATN()); - log.debug("Lexer ATN: {} elements", bundle.getLexerSerializedATN().length); } if (bundle.getLexerRuleNames() != null) { @@ -152,7 +118,6 @@ private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) // Parser ATN & metadata if (bundle.getParserSerializedATN() != null) { builder.field("parserSerializedATN", bundle.getParserSerializedATN()); - log.debug("Parser ATN: {} elements", bundle.getParserSerializedATN().length); } if (bundle.getParserRuleNames() != null) { diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java index 6f7ed82dc75..031b32ddb74 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -23,17 +23,7 @@ import org.opensearch.test.rest.FakeRestRequest; import org.opensearch.transport.client.node.NodeClient; -/** - * Unit tests for RestPPLGrammarAction. - * - *

    Tests: - * - *

      - *
    • 200 OK response with grammar bundle - *
    • Grammar structure validation - *
    • Cache behavior - *
    - */ +/** Unit tests for {@link RestPPLGrammarAction}. */ public class RestPPLGrammarActionTest extends OpenSearchTestCase { private RestPPLGrammarAction action; @@ -83,8 +73,7 @@ public void testGetGrammar_ReturnsBundle() throws Exception { } @Test - public void testGetGrammar_ArtifactIsCached() throws Exception { - // Make two requests + public void testGetGrammar_BundleIsCached() throws Exception { FakeRestRequest request1 = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) .withMethod(RestRequest.Method.GET) @@ -96,7 +85,6 @@ public void testGetGrammar_ArtifactIsCached() throws Exception { action.prepareRequest(request1, client).accept(channel1); long elapsed1 = System.currentTimeMillis() - startTime1; - // Second request should be faster (cached) FakeRestRequest request2 = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) .withMethod(RestRequest.Method.GET) @@ -108,7 +96,6 @@ public void testGetGrammar_ArtifactIsCached() throws Exception { action.prepareRequest(request2, client).accept(channel2); long elapsed2 = System.currentTimeMillis() - startTime2; - // Second request should be faster (bundle cached) assertTrue( "Second request should be faster due to caching (elapsed1=" + elapsed1 @@ -120,7 +107,6 @@ public void testGetGrammar_ArtifactIsCached() throws Exception { @Test public void testInvalidateCache() throws Exception { - // Get grammar FakeRestRequest request1 = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) .withMethod(RestRequest.Method.GET) @@ -131,10 +117,8 @@ public void testInvalidateCache() throws Exception { action.prepareRequest(request1, client).accept(channel1); assertEquals(RestStatus.OK, channel1.getResponse().status()); - // Invalidate cache action.invalidateCache(); - // Get grammar again — should still return 200 OK with same content FakeRestRequest request2 = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) .withMethod(RestRequest.Method.GET) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java index ce73f0d3b5f..a5ee5bd4e43 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java @@ -8,7 +8,6 @@ import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import lombok.extern.log4j.Log4j2; import org.antlr.v4.runtime.CharStreams; import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.Vocabulary; @@ -18,33 +17,22 @@ import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; /** Builds the {@link GrammarBundle} for the PPL language from the generated ANTLR lexer/parser. */ -@Log4j2 public class PPLGrammarBundleBuilder { private static final String ANTLR_VERSION = "4.13.2"; private static final String BUNDLE_VERSION = "1.0"; - /** - * Build the PPL grammar bundle. - * - *

    Uses {@link ATNSerializer} to re-serialize the ATN into the int[] format expected by the - * antlr4ng runtime on the frontend. Do NOT use {@code lexer.getSerializedATN().chars().toArray()} - * — that yields raw UTF-16 char values which cause "state type 65535 is not valid" errors. - * - * @return {@link GrammarBundle} ready for JSON serialization - */ public GrammarBundle build() { - log.info("Building PPL grammar bundle..."); - OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString("")); CommonTokenStream tokens = new CommonTokenStream(lexer); OpenSearchPPLParser parser = new OpenSearchPPLParser(tokens); + // ATNSerializer re-serializes the ATN into the int[] format expected by antlr4ng. + // Do not use lexer.getSerializedATN().chars().toArray() — that yields raw UTF-16 char values + // which cause "state type 65535 is not valid" errors in the frontend deserializer. int[] lexerATN = new ATNSerializer(lexer.getATN()).serialize().toArray(); int[] parserATN = new ATNSerializer(parser.getATN()).serialize().toArray(); - log.info("Lexer ATN: {} elements, Parser ATN: {} elements", lexerATN.length, parserATN.length); - Vocabulary vocabulary = parser.getVocabulary(); int maxTokenType = vocabulary.getMaxTokenType(); String[] literalNames = new String[maxTokenType + 1]; @@ -54,23 +42,19 @@ public GrammarBundle build() { symbolicNames[i] = vocabulary.getSymbolicName(i); } - GrammarBundle bundle = - GrammarBundle.builder() - .bundleVersion(BUNDLE_VERSION) - .grammarHash(computeGrammarHash(lexerATN, parserATN)) - .lexerSerializedATN(lexerATN) - .parserSerializedATN(parserATN) - .lexerRuleNames(lexer.getRuleNames()) - .parserRuleNames(parser.getRuleNames()) - .channelNames(lexer.getChannelNames()) - .modeNames(lexer.getModeNames()) - .startRuleIndex(0) - .literalNames(literalNames) - .symbolicNames(symbolicNames) - .build(); - - log.info("Built PPL grammar bundle: {} tokens", symbolicNames.length); - return bundle; + return GrammarBundle.builder() + .bundleVersion(BUNDLE_VERSION) + .grammarHash(computeGrammarHash(lexerATN, parserATN)) + .lexerSerializedATN(lexerATN) + .parserSerializedATN(parserATN) + .lexerRuleNames(lexer.getRuleNames()) + .parserRuleNames(parser.getRuleNames()) + .channelNames(lexer.getChannelNames()) + .modeNames(lexer.getModeNames()) + .startRuleIndex(0) + .literalNames(literalNames) + .symbolicNames(symbolicNames) + .build(); } private static String computeGrammarHash(int[] lexerATN, int[] parserATN) { From 937375b3b32108ec0f31b2882a273b7ba89f4b5d Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Thu, 19 Feb 2026 14:07:50 -0800 Subject: [PATCH 05/20] modify tests Signed-off-by: Eric Wei --- .../plugin/rest/RestPPLGrammarActionTest.java | 91 ++++++++-------- .../PPLGrammarBundleBuilderTest.java | 100 ++++++++++++++++++ 2 files changed, 149 insertions(+), 42 deletions(-) create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java index 031b32ddb74..f58a8f0b5b4 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -10,28 +10,29 @@ import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; -import java.io.ByteArrayOutputStream; -import java.nio.charset.StandardCharsets; +import java.io.IOException; import org.junit.Before; import org.junit.Test; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.xcontent.XContentType; import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.MediaType; import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.rest.RestChannel; import org.opensearch.rest.RestRequest; import org.opensearch.rest.RestResponse; -import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.rest.FakeRestRequest; import org.opensearch.transport.client.node.NodeClient; /** Unit tests for {@link RestPPLGrammarAction}. */ -public class RestPPLGrammarActionTest extends OpenSearchTestCase { +public class RestPPLGrammarActionTest { private RestPPLGrammarAction action; private NodeClient client; @Before - public void setUp() throws Exception { - super.setUp(); + public void setUp() { action = new RestPPLGrammarAction(); client = mock(NodeClient.class); } @@ -57,19 +58,22 @@ public void testGetGrammar_ReturnsBundle() throws Exception { .build(); MockRestChannel channel = new MockRestChannel(request, true); - action.prepareRequest(request, client).accept(channel); + action.handleRequest(request, channel, client); RestResponse response = channel.getResponse(); assertNotNull("Response should not be null", response); assertEquals("Should return 200 OK", RestStatus.OK, response.status()); - String content = new String(response.content().array(), StandardCharsets.UTF_8); - assertTrue("Should contain bundleVersion", content.contains("\"bundleVersion\":")); - assertTrue("Should contain grammarHash", content.contains("\"grammarHash\":")); - assertTrue("Should contain lexerSerializedATN", content.contains("\"lexerSerializedATN\":")); - assertTrue("Should contain parserSerializedATN", content.contains("\"parserSerializedATN\":")); - assertTrue("Should contain literalNames", content.contains("\"literalNames\":")); - assertTrue("Should contain symbolicNames", content.contains("\"symbolicNames\":")); + String content = response.content().utf8ToString(); + assertTrue(content.contains("\"bundleVersion\":\"1.0\"")); + assertTrue(content.contains("\"grammarHash\":\"sha256:")); + assertTrue(content.contains("\"startRuleIndex\":0")); + assertTrue(content.contains("\"lexerSerializedATN\":")); + assertTrue(content.contains("\"parserSerializedATN\":")); + assertTrue(content.contains("\"lexerRuleNames\":")); + assertTrue(content.contains("\"parserRuleNames\":")); + assertTrue(content.contains("\"literalNames\":")); + assertTrue(content.contains("\"symbolicNames\":")); } @Test @@ -79,30 +83,20 @@ public void testGetGrammar_BundleIsCached() throws Exception { .withMethod(RestRequest.Method.GET) .withPath("/_plugins/_ppl/_grammar") .build(); - MockRestChannel channel1 = new MockRestChannel(request1, true); - long startTime1 = System.currentTimeMillis(); - action.prepareRequest(request1, client).accept(channel1); - long elapsed1 = System.currentTimeMillis() - startTime1; + action.handleRequest(request1, channel1, client); FakeRestRequest request2 = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) .withMethod(RestRequest.Method.GET) .withPath("/_plugins/_ppl/_grammar") .build(); - MockRestChannel channel2 = new MockRestChannel(request2, true); - long startTime2 = System.currentTimeMillis(); - action.prepareRequest(request2, client).accept(channel2); - long elapsed2 = System.currentTimeMillis() - startTime2; - - assertTrue( - "Second request should be faster due to caching (elapsed1=" - + elapsed1 - + "ms, elapsed2=" - + elapsed2 - + "ms)", - elapsed2 < elapsed1 || elapsed2 < 50); // Allow some variance + action.handleRequest(request2, channel2, client); + + String content1 = channel1.getResponse().content().utf8ToString(); + String content2 = channel2.getResponse().content().utf8ToString(); + assertEquals("Consecutive requests should return identical content", content1, content2); } @Test @@ -112,9 +106,8 @@ public void testInvalidateCache() throws Exception { .withMethod(RestRequest.Method.GET) .withPath("/_plugins/_ppl/_grammar") .build(); - MockRestChannel channel1 = new MockRestChannel(request1, true); - action.prepareRequest(request1, client).accept(channel1); + action.handleRequest(request1, channel1, client); assertEquals(RestStatus.OK, channel1.getResponse().status()); action.invalidateCache(); @@ -124,10 +117,13 @@ public void testInvalidateCache() throws Exception { .withMethod(RestRequest.Method.GET) .withPath("/_plugins/_ppl/_grammar") .build(); - MockRestChannel channel2 = new MockRestChannel(request2, true); - action.prepareRequest(request2, client).accept(channel2); + action.handleRequest(request2, channel2, client); assertEquals(RestStatus.OK, channel2.getResponse().status()); + + String content1 = channel1.getResponse().content().utf8ToString(); + String content2 = channel2.getResponse().content().utf8ToString(); + assertEquals("Grammar hash should be identical after cache invalidation and rebuild", content1, content2); } /** Mock RestChannel to capture responses */ @@ -161,23 +157,34 @@ public boolean detailedErrorsEnabled() { } @Override - public org.opensearch.core.xcontent.XContentBuilder newBuilder() { - return null; + public boolean detailedErrorStackTraceEnabled() { + return false; } @Override - public org.opensearch.core.xcontent.XContentBuilder newErrorBuilder() { - return null; + public XContentBuilder newBuilder() throws IOException { + return XContentBuilder.builder(XContentType.JSON.xContent()); } @Override - public org.opensearch.core.xcontent.XContentBuilder newBuilder( - org.opensearch.core.xcontent.MediaType mediaType, boolean useFiltering) { - return null; + public XContentBuilder newErrorBuilder() throws IOException { + return XContentBuilder.builder(XContentType.JSON.xContent()); + } + + @Override + public XContentBuilder newBuilder(MediaType mediaType, boolean useFiltering) throws IOException { + return XContentBuilder.builder(XContentType.JSON.xContent()); + } + + @Override + public XContentBuilder newBuilder( + MediaType requestContentType, MediaType responseContentType, boolean useFiltering) + throws IOException { + return XContentBuilder.builder(XContentType.JSON.xContent()); } @Override - public ByteArrayOutputStream bytesOutput() { + public BytesStreamOutput bytesOutput() { return null; } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java new file mode 100644 index 00000000000..c2501090ab0 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java @@ -0,0 +1,100 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.autocomplete; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.opensearch.sql.executor.autocomplete.GrammarBundle; + +public class PPLGrammarBundleBuilderTest { + + private static GrammarBundle bundle; + + @BeforeClass + public static void buildBundle() { + bundle = new PPLGrammarBundleBuilder().build(); + } + + @Test + public void bundleIsNotNull() { + assertNotNull(bundle); + } + + @Test + public void bundleVersionIsSet() { + assertEquals("1.0", bundle.getBundleVersion()); + } + + @Test + public void grammarHashHasExpectedFormat() { + String hash = bundle.getGrammarHash(); + assertNotNull(hash); + assertTrue("grammarHash should start with 'sha256:'", hash.startsWith("sha256:")); + assertTrue("grammarHash should be 71 chars (sha256: + 64 hex)", hash.length() == 71); + } + + @Test + public void startRuleIndexIsZero() { + assertEquals(0, bundle.getStartRuleIndex()); + } + + @Test + public void lexerATNIsNonEmpty() { + assertNotNull(bundle.getLexerSerializedATN()); + assertTrue(bundle.getLexerSerializedATN().length > 0); + } + + @Test + public void parserATNIsNonEmpty() { + assertNotNull(bundle.getParserSerializedATN()); + assertTrue(bundle.getParserSerializedATN().length > 0); + } + + @Test + public void lexerRuleNamesAreNonEmpty() { + assertNotNull(bundle.getLexerRuleNames()); + assertTrue(bundle.getLexerRuleNames().length > 0); + } + + @Test + public void parserRuleNamesAreNonEmpty() { + assertNotNull(bundle.getParserRuleNames()); + assertTrue(bundle.getParserRuleNames().length > 0); + } + + @Test + public void channelNamesAreNonEmpty() { + assertNotNull(bundle.getChannelNames()); + assertTrue(bundle.getChannelNames().length > 0); + } + + @Test + public void modeNamesAreNonEmpty() { + assertNotNull(bundle.getModeNames()); + assertTrue(bundle.getModeNames().length > 0); + } + + @Test + public void vocabularyIsNonEmpty() { + assertNotNull(bundle.getLiteralNames()); + assertNotNull(bundle.getSymbolicNames()); + assertTrue(bundle.getLiteralNames().length > 0); + assertTrue(bundle.getSymbolicNames().length > 0); + } + + @Test + public void buildIsDeterministic() { + GrammarBundle second = new PPLGrammarBundleBuilder().build(); + assertEquals( + "Two builds of the same grammar should produce the same hash", + bundle.getGrammarHash(), + second.getGrammarHash()); + } +} From c7d1b8978c231c6632cc7e29232de85a26924376 Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Thu, 19 Feb 2026 14:42:20 -0800 Subject: [PATCH 06/20] a few mode cleanup Signed-off-by: Eric Wei --- .../sql/plugin/rest/RestPPLGrammarAction.java | 63 ++++++------------- .../plugin/rest/RestPPLGrammarActionTest.java | 48 ++++++++++++-- .../sql/ppl}/autocomplete/GrammarBundle.java | 25 ++++---- .../autocomplete/PPLGrammarBundleBuilder.java | 8 ++- .../PPLGrammarBundleBuilderTest.java | 3 +- 5 files changed, 81 insertions(+), 66 deletions(-) rename {core/src/main/java/org/opensearch/sql/executor => ppl/src/main/java/org/opensearch/sql/ppl}/autocomplete/GrammarBundle.java (59%) diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java index 5cc8c72a631..466dfced2ce 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java @@ -7,6 +7,7 @@ import static org.opensearch.rest.RestRequest.Method.GET; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import java.io.IOException; import java.util.List; @@ -16,7 +17,7 @@ import org.opensearch.rest.BaseRestHandler; import org.opensearch.rest.BytesRestResponse; import org.opensearch.rest.RestRequest; -import org.opensearch.sql.executor.autocomplete.GrammarBundle; +import org.opensearch.sql.ppl.autocomplete.GrammarBundle; import org.opensearch.sql.ppl.autocomplete.PPLGrammarBundleBuilder; import org.opensearch.transport.client.node.NodeClient; @@ -47,12 +48,9 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli return channel -> { try { GrammarBundle bundle = getOrBuildBundle(); - XContentBuilder builder = channel.newBuilder(); serializeBundle(builder, bundle); - channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder)); - } catch (Exception e) { log.error("Error building or serializing PPL grammar", e); channel.sendResponse(new BytesRestResponse(channel, RestStatus.INTERNAL_SERVER_ERROR, e)); @@ -68,29 +66,27 @@ private GrammarBundle getOrBuildBundle() { synchronized (bundleLock) { // double-check after acquiring lock if (cachedBundle == null) { - log.info("Building PPL grammar bundle (first request)..."); - long startTime = System.currentTimeMillis(); - - PPLGrammarBundleBuilder builder = new PPLGrammarBundleBuilder(); - cachedBundle = builder.build(); - - long elapsed = System.currentTimeMillis() - startTime; - log.info("Built PPL grammar in {}ms (hash: {})", elapsed, cachedBundle.getGrammarHash()); + cachedBundle = buildBundle(); } return cachedBundle; } } + /** Constructs the grammar bundle. Override in tests to inject a custom or failing builder. */ + @VisibleForTesting + protected GrammarBundle buildBundle() { + return new PPLGrammarBundleBuilder().build(); + } + /** Invalidate the cached bundle, forcing a rebuild on the next request. */ + @VisibleForTesting public void invalidateCache() { synchronized (bundleLock) { - log.info("Invalidating cached PPL grammar bundle"); cachedBundle = null; } } - private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) - throws IOException { + private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) throws IOException { builder.startObject(); // Identity & versioning @@ -99,39 +95,18 @@ private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) builder.field("startRuleIndex", bundle.getStartRuleIndex()); // Lexer ATN & metadata - if (bundle.getLexerSerializedATN() != null) { - builder.field("lexerSerializedATN", bundle.getLexerSerializedATN()); - } - - if (bundle.getLexerRuleNames() != null) { - builder.field("lexerRuleNames", bundle.getLexerRuleNames()); - } - - if (bundle.getChannelNames() != null) { - builder.field("channelNames", bundle.getChannelNames()); - } - - if (bundle.getModeNames() != null) { - builder.field("modeNames", bundle.getModeNames()); - } + builder.field("lexerSerializedATN", bundle.getLexerSerializedATN()); + builder.field("lexerRuleNames", bundle.getLexerRuleNames()); + builder.field("channelNames", bundle.getChannelNames()); + builder.field("modeNames", bundle.getModeNames()); // Parser ATN & metadata - if (bundle.getParserSerializedATN() != null) { - builder.field("parserSerializedATN", bundle.getParserSerializedATN()); - } - - if (bundle.getParserRuleNames() != null) { - builder.field("parserRuleNames", bundle.getParserRuleNames()); - } + builder.field("parserSerializedATN", bundle.getParserSerializedATN()); + builder.field("parserRuleNames", bundle.getParserRuleNames()); // Vocabulary - if (bundle.getLiteralNames() != null) { - builder.field("literalNames", bundle.getLiteralNames()); - } - - if (bundle.getSymbolicNames() != null) { - builder.field("symbolicNames", bundle.getSymbolicNames()); - } + builder.field("literalNames", bundle.getLiteralNames()); + builder.field("symbolicNames", bundle.getSymbolicNames()); builder.endObject(); } diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java index f58a8f0b5b4..cf68bdab221 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -11,6 +11,7 @@ import static org.mockito.Mockito.mock; import java.io.IOException; +import java.util.concurrent.atomic.AtomicInteger; import org.junit.Before; import org.junit.Test; import org.opensearch.common.io.stream.BytesStreamOutput; @@ -22,6 +23,7 @@ import org.opensearch.rest.RestChannel; import org.opensearch.rest.RestRequest; import org.opensearch.rest.RestResponse; +import org.opensearch.sql.ppl.autocomplete.GrammarBundle; import org.opensearch.test.rest.FakeRestRequest; import org.opensearch.transport.client.node.NodeClient; @@ -78,13 +80,23 @@ public void testGetGrammar_ReturnsBundle() throws Exception { @Test public void testGetGrammar_BundleIsCached() throws Exception { + AtomicInteger buildCount = new AtomicInteger(0); + RestPPLGrammarAction countingAction = + new RestPPLGrammarAction() { + @Override + protected GrammarBundle buildBundle() { + buildCount.incrementAndGet(); + return super.buildBundle(); + } + }; + FakeRestRequest request1 = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) .withMethod(RestRequest.Method.GET) .withPath("/_plugins/_ppl/_grammar") .build(); MockRestChannel channel1 = new MockRestChannel(request1, true); - action.handleRequest(request1, channel1, client); + countingAction.handleRequest(request1, channel1, client); FakeRestRequest request2 = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) @@ -92,11 +104,13 @@ public void testGetGrammar_BundleIsCached() throws Exception { .withPath("/_plugins/_ppl/_grammar") .build(); MockRestChannel channel2 = new MockRestChannel(request2, true); - action.handleRequest(request2, channel2, client); + countingAction.handleRequest(request2, channel2, client); - String content1 = channel1.getResponse().content().utf8ToString(); - String content2 = channel2.getResponse().content().utf8ToString(); - assertEquals("Consecutive requests should return identical content", content1, content2); + assertEquals("Bundle should be built exactly once", 1, buildCount.get()); + assertEquals( + "Consecutive requests should return identical content", + channel1.getResponse().content().utf8ToString(), + channel2.getResponse().content().utf8ToString()); } @Test @@ -123,7 +137,29 @@ public void testInvalidateCache() throws Exception { String content1 = channel1.getResponse().content().utf8ToString(); String content2 = channel2.getResponse().content().utf8ToString(); - assertEquals("Grammar hash should be identical after cache invalidation and rebuild", content1, content2); + assertEquals( + "Grammar hash should be identical after cache invalidation and rebuild", content1, content2); + } + + @Test + public void testGetGrammar_ErrorPath_Returns500() throws Exception { + RestPPLGrammarAction failingAction = + new RestPPLGrammarAction() { + @Override + protected GrammarBundle buildBundle() { + throw new RuntimeException("simulated build failure"); + } + }; + + FakeRestRequest request = + new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) + .withMethod(RestRequest.Method.GET) + .withPath("/_plugins/_ppl/_grammar") + .build(); + MockRestChannel channel = new MockRestChannel(request, true); + failingAction.handleRequest(request, channel, client); + + assertEquals(RestStatus.INTERNAL_SERVER_ERROR, channel.getResponse().status()); } /** Mock RestChannel to capture responses */ diff --git a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java similarity index 59% rename from core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java rename to ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java index 7b3720881aa..cd840d71e8c 100644 --- a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java @@ -3,46 +3,47 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.sql.executor.autocomplete; +package org.opensearch.sql.ppl.autocomplete; import lombok.Builder; import lombok.Data; +import lombok.NonNull; -/** Response payload for the {@code GET /_plugins/_ppl/_grammar} endpoint. */ +/** Serialized ANTLR grammar data served by {@code GET /_plugins/_ppl/_grammar}. */ @Data @Builder public class GrammarBundle { /** Bundle format version. */ - private String bundleVersion; + @NonNull private String bundleVersion; /** SHA-256 hash of the serialized ATN data. Clients may use this to detect grammar changes. */ - private String grammarHash; + @NonNull private String grammarHash; /** Serialized lexer ATN as int array (ATNSerializer output). */ - private int[] lexerSerializedATN; + @NonNull private int[] lexerSerializedATN; /** Lexer rule names. */ - private String[] lexerRuleNames; + @NonNull private String[] lexerRuleNames; /** Channel names (e.g. DEFAULT_TOKEN_CHANNEL, HIDDEN). */ - private String[] channelNames; + @NonNull private String[] channelNames; /** Mode names (e.g. DEFAULT_MODE). */ - private String[] modeNames; + @NonNull private String[] modeNames; /** Serialized parser ATN as int array (ATNSerializer output). */ - private int[] parserSerializedATN; + @NonNull private int[] parserSerializedATN; /** Parser rule names. */ - private String[] parserRuleNames; + @NonNull private String[] parserRuleNames; /** Start rule index (0 = root rule). */ private int startRuleIndex; /** Literal token names indexed by token type (e.g. "'search'", "'|'"). */ - private String[] literalNames; + @NonNull private String[] literalNames; /** Symbolic token names indexed by token type (e.g. "SEARCH", "PIPE"). */ - private String[] symbolicNames; + @NonNull private String[] symbolicNames; } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java index a5ee5bd4e43..7e8edeb0d73 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java @@ -12,13 +12,13 @@ import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.Vocabulary; import org.antlr.v4.runtime.atn.ATNSerializer; -import org.opensearch.sql.executor.autocomplete.GrammarBundle; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLLexer; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; /** Builds the {@link GrammarBundle} for the PPL language from the generated ANTLR lexer/parser. */ public class PPLGrammarBundleBuilder { + // Keep in sync with the antlr4 version in ppl/build.gradle. private static final String ANTLR_VERSION = "4.13.2"; private static final String BUNDLE_VERSION = "1.0"; @@ -60,6 +60,8 @@ public GrammarBundle build() { private static String computeGrammarHash(int[] lexerATN, int[] parserATN) { try { MessageDigest digest = MessageDigest.getInstance("SHA-256"); + // ANTLR4 serialized ATN values are bounded to 16 bits (unicode char range), so hashing + // 2 bytes per element captures the full value without loss. for (int v : lexerATN) { digest.update((byte) (v >> 8)); digest.update((byte) v); @@ -70,7 +72,9 @@ private static String computeGrammarHash(int[] lexerATN, int[] parserATN) { } digest.update(ANTLR_VERSION.getBytes(StandardCharsets.UTF_8)); byte[] hash = digest.digest(); - StringBuilder sb = new StringBuilder("sha256:"); + // Output is always "sha256:" (7 chars) + 64 hex chars = 71 chars. + StringBuilder sb = new StringBuilder(71); + sb.append("sha256:"); for (byte b : hash) { sb.append(String.format("%02x", b & 0xFF)); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java index c2501090ab0..18e3cab01c2 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java @@ -11,7 +11,6 @@ import org.junit.BeforeClass; import org.junit.Test; -import org.opensearch.sql.executor.autocomplete.GrammarBundle; public class PPLGrammarBundleBuilderTest { @@ -37,7 +36,7 @@ public void grammarHashHasExpectedFormat() { String hash = bundle.getGrammarHash(); assertNotNull(hash); assertTrue("grammarHash should start with 'sha256:'", hash.startsWith("sha256:")); - assertTrue("grammarHash should be 71 chars (sha256: + 64 hex)", hash.length() == 71); + assertEquals("grammarHash should be 71 chars (sha256: + 64 hex)", 71, hash.length()); } @Test From 6ab349a267cb7d1ef9a45e844040bee1afa42373 Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Fri, 20 Feb 2026 14:25:15 -0800 Subject: [PATCH 07/20] Read ANTLR version from runtime Signed-off-by: Eric Wei --- .../ppl/autocomplete/PPLGrammarBundleBuilder.java | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java index 7e8edeb0d73..7efb4e1b207 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java @@ -17,19 +17,20 @@ /** Builds the {@link GrammarBundle} for the PPL language from the generated ANTLR lexer/parser. */ public class PPLGrammarBundleBuilder { - - // Keep in sync with the antlr4 version in ppl/build.gradle. - private static final String ANTLR_VERSION = "4.13.2"; + private static final String ANTLR_VERSION = getAntlrVersion(); private static final String BUNDLE_VERSION = "1.0"; + private static String getAntlrVersion() { + Package antlrPackage = org.antlr.v4.runtime.RuntimeMetaData.class.getPackage(); + String version = antlrPackage.getImplementationVersion(); + return version != null ? version : "unknown"; + } + public GrammarBundle build() { OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString("")); CommonTokenStream tokens = new CommonTokenStream(lexer); OpenSearchPPLParser parser = new OpenSearchPPLParser(tokens); - // ATNSerializer re-serializes the ATN into the int[] format expected by antlr4ng. - // Do not use lexer.getSerializedATN().chars().toArray() — that yields raw UTF-16 char values - // which cause "state type 65535 is not valid" errors in the frontend deserializer. int[] lexerATN = new ATNSerializer(lexer.getATN()).serialize().toArray(); int[] parserATN = new ATNSerializer(parser.getATN()).serialize().toArray(); @@ -60,8 +61,6 @@ public GrammarBundle build() { private static String computeGrammarHash(int[] lexerATN, int[] parserATN) { try { MessageDigest digest = MessageDigest.getInstance("SHA-256"); - // ANTLR4 serialized ATN values are bounded to 16 bits (unicode char range), so hashing - // 2 bytes per element captures the full value without loss. for (int v : lexerATN) { digest.update((byte) (v >> 8)); digest.update((byte) v); From c6e4b55ba382d8b6a3a28f7185ba6d6a56e1017a Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Fri, 20 Feb 2026 14:45:20 -0800 Subject: [PATCH 08/20] add antlr version Signed-off-by: Eric Wei --- .../org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java | 1 + .../opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java | 1 + .../org/opensearch/sql/ppl/autocomplete/GrammarBundle.java | 3 +++ .../sql/ppl/autocomplete/PPLGrammarBundleBuilder.java | 1 + 4 files changed, 6 insertions(+) diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java index 466dfced2ce..aecdab7b9f3 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java @@ -91,6 +91,7 @@ private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) thro // Identity & versioning builder.field("bundleVersion", bundle.getBundleVersion()); + builder.field("antlrVersion", bundle.getAntlrVersion()); builder.field("grammarHash", bundle.getGrammarHash()); builder.field("startRuleIndex", bundle.getStartRuleIndex()); diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java index cf68bdab221..535f706f0b8 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -68,6 +68,7 @@ public void testGetGrammar_ReturnsBundle() throws Exception { String content = response.content().utf8ToString(); assertTrue(content.contains("\"bundleVersion\":\"1.0\"")); + assertTrue(content.contains("\"antlrVersion\":")); assertTrue(content.contains("\"grammarHash\":\"sha256:")); assertTrue(content.contains("\"startRuleIndex\":0")); assertTrue(content.contains("\"lexerSerializedATN\":")); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java index cd840d71e8c..7492ac6c17e 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java @@ -17,6 +17,9 @@ public class GrammarBundle { /** Bundle format version. */ @NonNull private String bundleVersion; + /** ANTLR runtime version used to generate the grammar. */ + @NonNull private String antlrVersion; + /** SHA-256 hash of the serialized ATN data. Clients may use this to detect grammar changes. */ @NonNull private String grammarHash; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java index 7efb4e1b207..194f85e6a10 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java @@ -45,6 +45,7 @@ public GrammarBundle build() { return GrammarBundle.builder() .bundleVersion(BUNDLE_VERSION) + .antlrVersion(ANTLR_VERSION) .grammarHash(computeGrammarHash(lexerATN, parserATN)) .lexerSerializedATN(lexerATN) .parserSerializedATN(parserATN) From 67fd690c372c0cb3285a65492f7f2b22743e6786 Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Fri, 20 Feb 2026 15:00:06 -0800 Subject: [PATCH 09/20] spotless fix Signed-off-by: Eric Wei --- .../sql/plugin/rest/RestPPLGrammarActionTest.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java index 535f706f0b8..95825df39f9 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -139,7 +139,9 @@ public void testInvalidateCache() throws Exception { String content1 = channel1.getResponse().content().utf8ToString(); String content2 = channel2.getResponse().content().utf8ToString(); assertEquals( - "Grammar hash should be identical after cache invalidation and rebuild", content1, content2); + "Grammar hash should be identical after cache invalidation and rebuild", + content1, + content2); } @Test @@ -209,7 +211,8 @@ public XContentBuilder newErrorBuilder() throws IOException { } @Override - public XContentBuilder newBuilder(MediaType mediaType, boolean useFiltering) throws IOException { + public XContentBuilder newBuilder(MediaType mediaType, boolean useFiltering) + throws IOException { return XContentBuilder.builder(XContentType.JSON.xContent()); } From 19d2884247d843bced033d54f57693c1158f95f3 Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Sun, 22 Feb 2026 22:16:45 -0800 Subject: [PATCH 10/20] Address PR review: fix hash truncation, antlrVersion API, immutability, and tests - Hash full 32-bit ints in grammarHash to avoid collisions with ANTLR 4.13.2 ATN serialization - Use RuntimeMetaData.getRuntimeVersion() instead of unreliable JAR manifest lookup - Make GrammarBundle immutable with @Value instead of @Data - Update THIRD-PARTY to reflect ANTLR 4.13.2 - Harden tests with JSON parsing and add antlrVersion assertion Signed-off-by: Eric Wei --- THIRD-PARTY | 6 ++-- .../plugin/rest/RestPPLGrammarActionTest.java | 36 +++++++++++++------ .../sql/ppl/autocomplete/GrammarBundle.java | 4 +-- .../autocomplete/PPLGrammarBundleBuilder.java | 28 +++++++-------- .../PPLGrammarBundleBuilderTest.java | 9 +++++ 5 files changed, 53 insertions(+), 30 deletions(-) diff --git a/THIRD-PARTY b/THIRD-PARTY index 8469c53d5e5..4fe2762c7e8 100644 --- a/THIRD-PARTY +++ b/THIRD-PARTY @@ -467,15 +467,15 @@ DAMAGE. ------ -** ANTLR; version 4.7.1 -- https://github.com/antlr/antlr4 +** ANTLR; version 4.13.2 -- https://github.com/antlr/antlr4 /* - * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Copyright (c) 2012-2024 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ [The "BSD 3-clause license"] -Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. +Copyright (c) 2012-2024 The ANTLR Project. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java index 95825df39f9..a7aa620219f 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -12,6 +12,7 @@ import java.io.IOException; import java.util.concurrent.atomic.AtomicInteger; +import org.json.JSONObject; import org.junit.Before; import org.junit.Test; import org.opensearch.common.io.stream.BytesStreamOutput; @@ -67,16 +68,31 @@ public void testGetGrammar_ReturnsBundle() throws Exception { assertEquals("Should return 200 OK", RestStatus.OK, response.status()); String content = response.content().utf8ToString(); - assertTrue(content.contains("\"bundleVersion\":\"1.0\"")); - assertTrue(content.contains("\"antlrVersion\":")); - assertTrue(content.contains("\"grammarHash\":\"sha256:")); - assertTrue(content.contains("\"startRuleIndex\":0")); - assertTrue(content.contains("\"lexerSerializedATN\":")); - assertTrue(content.contains("\"parserSerializedATN\":")); - assertTrue(content.contains("\"lexerRuleNames\":")); - assertTrue(content.contains("\"parserRuleNames\":")); - assertTrue(content.contains("\"literalNames\":")); - assertTrue(content.contains("\"symbolicNames\":")); + JSONObject json = new JSONObject(content); + + // Identity & versioning + assertEquals("1.0", json.getString("bundleVersion")); + assertTrue( + "antlrVersion should be a version string", + json.getString("antlrVersion").matches("\\d+\\.\\d+.*")); + assertTrue( + "grammarHash should start with sha256:", + json.getString("grammarHash").startsWith("sha256:")); + assertEquals(0, json.getInt("startRuleIndex")); + + // Lexer ATN & metadata (non-empty arrays) + assertTrue(json.getJSONArray("lexerSerializedATN").length() > 0); + assertTrue(json.getJSONArray("lexerRuleNames").length() > 0); + assertTrue(json.getJSONArray("channelNames").length() > 0); + assertTrue(json.getJSONArray("modeNames").length() > 0); + + // Parser ATN & metadata (non-empty arrays) + assertTrue(json.getJSONArray("parserSerializedATN").length() > 0); + assertTrue(json.getJSONArray("parserRuleNames").length() > 0); + + // Vocabulary (non-empty arrays) + assertTrue(json.getJSONArray("literalNames").length() > 0); + assertTrue(json.getJSONArray("symbolicNames").length() > 0); } @Test diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java index 7492ac6c17e..0b733475994 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java @@ -6,11 +6,11 @@ package org.opensearch.sql.ppl.autocomplete; import lombok.Builder; -import lombok.Data; import lombok.NonNull; +import lombok.Value; /** Serialized ANTLR grammar data served by {@code GET /_plugins/_ppl/_grammar}. */ -@Data +@Value @Builder public class GrammarBundle { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java index 194f85e6a10..c89ec7f1eed 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java @@ -17,15 +17,10 @@ /** Builds the {@link GrammarBundle} for the PPL language from the generated ANTLR lexer/parser. */ public class PPLGrammarBundleBuilder { - private static final String ANTLR_VERSION = getAntlrVersion(); + private static final String ANTLR_VERSION = + org.antlr.v4.runtime.RuntimeMetaData.getRuntimeVersion(); private static final String BUNDLE_VERSION = "1.0"; - private static String getAntlrVersion() { - Package antlrPackage = org.antlr.v4.runtime.RuntimeMetaData.class.getPackage(); - String version = antlrPackage.getImplementationVersion(); - return version != null ? version : "unknown"; - } - public GrammarBundle build() { OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString("")); CommonTokenStream tokens = new CommonTokenStream(lexer); @@ -62,14 +57,8 @@ public GrammarBundle build() { private static String computeGrammarHash(int[] lexerATN, int[] parserATN) { try { MessageDigest digest = MessageDigest.getInstance("SHA-256"); - for (int v : lexerATN) { - digest.update((byte) (v >> 8)); - digest.update((byte) v); - } - for (int v : parserATN) { - digest.update((byte) (v >> 8)); - digest.update((byte) v); - } + updateDigest(digest, lexerATN); + updateDigest(digest, parserATN); digest.update(ANTLR_VERSION.getBytes(StandardCharsets.UTF_8)); byte[] hash = digest.digest(); // Output is always "sha256:" (7 chars) + 64 hex chars = 71 chars. @@ -83,4 +72,13 @@ private static String computeGrammarHash(int[] lexerATN, int[] parserATN) { throw new IllegalStateException("SHA-256 not available", e); } } + + private static void updateDigest(MessageDigest digest, int[] data) { + for (int v : data) { + digest.update((byte) (v >>> 24)); + digest.update((byte) (v >>> 16)); + digest.update((byte) (v >>> 8)); + digest.update((byte) (v)); + } + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java index 18e3cab01c2..2083b63c212 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java @@ -31,6 +31,15 @@ public void bundleVersionIsSet() { assertEquals("1.0", bundle.getBundleVersion()); } + @Test + public void antlrVersionIsSet() { + String version = bundle.getAntlrVersion(); + assertNotNull("antlrVersion should not be null", version); + assertTrue( + "antlrVersion should look like a version string, got: " + version, + version.matches("\\d+\\.\\d+.*")); + } + @Test public void grammarHashHasExpectedFormat() { String hash = bundle.getGrammarHash(); From f4f5700b727a29d3a70e8f54ff99197f7822a49b Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Sun, 22 Feb 2026 22:45:47 -0800 Subject: [PATCH 11/20] Mark grammar API as experimental Signed-off-by: Eric Wei --- .../opensearch/sql/plugin/rest/RestPPLGrammarAction.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java index aecdab7b9f3..873c261cc4e 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java @@ -12,6 +12,7 @@ import java.io.IOException; import java.util.List; import lombok.extern.log4j.Log4j2; +import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.core.rest.RestStatus; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.rest.BaseRestHandler; @@ -21,7 +22,12 @@ import org.opensearch.sql.ppl.autocomplete.PPLGrammarBundleBuilder; import org.opensearch.transport.client.node.NodeClient; -/** REST handler for {@code GET /_plugins/_ppl/_grammar}. */ +/* + * REST handler for {@code GET /_plugins/_ppl/_grammar}. + * + * @opensearch.experimental + */ +@ExperimentalApi @Log4j2 public class RestPPLGrammarAction extends BaseRestHandler { From 97079d5ba8b0991e48e7f70cee33a32299a47c62 Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Sun, 22 Feb 2026 23:27:53 -0800 Subject: [PATCH 12/20] Address review: ATN v4 guard, startRuleIndex by name, test hardening - Assert ATN serialization version 4 for both lexer and parser to enforce antlr4ng compatibility contract - Resolve startRuleIndex by looking up "root" rule name instead of hardcoding 0 - Fix MockRestChannel.bytesOutput() to return real BytesStreamOutput - Document nullable elements in literalNames/symbolicNames Javadoc - Rename test methods to follow testXxx() convention per ppl/plugin modules Signed-off-by: Eric Wei --- .../plugin/rest/RestPPLGrammarActionTest.java | 2 +- .../sql/ppl/autocomplete/GrammarBundle.java | 10 ++++- .../autocomplete/PPLGrammarBundleBuilder.java | 8 +++- .../PPLGrammarBundleBuilderTest.java | 44 +++++++++++++------ 4 files changed, 47 insertions(+), 17 deletions(-) diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java index a7aa620219f..ef10c7d6cbf 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -241,7 +241,7 @@ public XContentBuilder newBuilder( @Override public BytesStreamOutput bytesOutput() { - return null; + return new BytesStreamOutput(); } } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java index 0b733475994..2c508b3853e 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java @@ -44,9 +44,15 @@ public class GrammarBundle { /** Start rule index (0 = root rule). */ private int startRuleIndex; - /** Literal token names indexed by token type (e.g. "'search'", "'|'"). */ + /** + * Literal token names indexed by token type (e.g. "'search'", "'|'"). Elements may be null for + * tokens with no literal form; clients must handle sparse arrays. + */ @NonNull private String[] literalNames; - /** Symbolic token names indexed by token type (e.g. "SEARCH", "PIPE"). */ + /** + * Symbolic token names indexed by token type (e.g. "SEARCH", "PIPE"). Elements may be null for + * tokens with no symbolic name; clients must handle sparse arrays. + */ @NonNull private String[] symbolicNames; } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java index c89ec7f1eed..2ec5eada4f6 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java @@ -8,6 +8,7 @@ import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.Arrays; import org.antlr.v4.runtime.CharStreams; import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.Vocabulary; @@ -48,12 +49,17 @@ public GrammarBundle build() { .parserRuleNames(parser.getRuleNames()) .channelNames(lexer.getChannelNames()) .modeNames(lexer.getModeNames()) - .startRuleIndex(0) + .startRuleIndex(resolveStartRuleIndex(parser.getRuleNames())) .literalNames(literalNames) .symbolicNames(symbolicNames) .build(); } + private static int resolveStartRuleIndex(String[] ruleNames) { + int idx = Arrays.asList(ruleNames).indexOf("root"); + return Math.max(idx, 0); + } + private static String computeGrammarHash(int[] lexerATN, int[] parserATN) { try { MessageDigest digest = MessageDigest.getInstance("SHA-256"); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java index 2083b63c212..ddfc8876c62 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java @@ -14,6 +14,8 @@ public class PPLGrammarBundleBuilderTest { + private static final int EXPECTED_ATN_SERIALIZATION_VERSION = 4; + private static GrammarBundle bundle; @BeforeClass @@ -22,17 +24,17 @@ public static void buildBundle() { } @Test - public void bundleIsNotNull() { + public void testBuildBundleNotNull() { assertNotNull(bundle); } @Test - public void bundleVersionIsSet() { + public void testBuildBundleVersionIsSet() { assertEquals("1.0", bundle.getBundleVersion()); } @Test - public void antlrVersionIsSet() { + public void testBuildAntlrVersionIsSet() { String version = bundle.getAntlrVersion(); assertNotNull("antlrVersion should not be null", version); assertTrue( @@ -41,7 +43,7 @@ public void antlrVersionIsSet() { } @Test - public void grammarHashHasExpectedFormat() { + public void testBuildGrammarHashHasSha256Format() { String hash = bundle.getGrammarHash(); assertNotNull(hash); assertTrue("grammarHash should start with 'sha256:'", hash.startsWith("sha256:")); @@ -49,48 +51,64 @@ public void grammarHashHasExpectedFormat() { } @Test - public void startRuleIndexIsZero() { + public void testBuildStartRuleIndexIsZero() { assertEquals(0, bundle.getStartRuleIndex()); } @Test - public void lexerATNIsNonEmpty() { + public void testBuildLexerATNIsNonEmpty() { assertNotNull(bundle.getLexerSerializedATN()); assertTrue(bundle.getLexerSerializedATN().length > 0); } @Test - public void parserATNIsNonEmpty() { + public void testBuildLexerATNIsSerializationVersion4() { + assertEquals( + "Lexer ATN must be serialization version 4 for antlr4ng compatibility", + EXPECTED_ATN_SERIALIZATION_VERSION, + bundle.getLexerSerializedATN()[0]); + } + + @Test + public void testBuildParserATNIsNonEmpty() { assertNotNull(bundle.getParserSerializedATN()); assertTrue(bundle.getParserSerializedATN().length > 0); } @Test - public void lexerRuleNamesAreNonEmpty() { + public void testBuildParserATNIsSerializationVersion4() { + assertEquals( + "Parser ATN must be serialization version 4 for antlr4ng compatibility", + EXPECTED_ATN_SERIALIZATION_VERSION, + bundle.getParserSerializedATN()[0]); + } + + @Test + public void testBuildLexerRuleNamesAreNonEmpty() { assertNotNull(bundle.getLexerRuleNames()); assertTrue(bundle.getLexerRuleNames().length > 0); } @Test - public void parserRuleNamesAreNonEmpty() { + public void testBuildParserRuleNamesAreNonEmpty() { assertNotNull(bundle.getParserRuleNames()); assertTrue(bundle.getParserRuleNames().length > 0); } @Test - public void channelNamesAreNonEmpty() { + public void testBuildChannelNamesAreNonEmpty() { assertNotNull(bundle.getChannelNames()); assertTrue(bundle.getChannelNames().length > 0); } @Test - public void modeNamesAreNonEmpty() { + public void testBuildModeNamesAreNonEmpty() { assertNotNull(bundle.getModeNames()); assertTrue(bundle.getModeNames().length > 0); } @Test - public void vocabularyIsNonEmpty() { + public void testBuildVocabularyIsNonEmpty() { assertNotNull(bundle.getLiteralNames()); assertNotNull(bundle.getSymbolicNames()); assertTrue(bundle.getLiteralNames().length > 0); @@ -98,7 +116,7 @@ public void vocabularyIsNonEmpty() { } @Test - public void buildIsDeterministic() { + public void testBuildIsDeterministic() { GrammarBundle second = new PPLGrammarBundleBuilder().build(); assertEquals( "Two builds of the same grammar should produce the same hash", From 3c6748b54444c7f49624a402cd8db3630db90050 Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Mon, 23 Feb 2026 11:55:51 -0800 Subject: [PATCH 13/20] Reduce invalidateCache() visibility from public to protected Consistent with buildBundle() which is also @VisibleForTesting protected. Signed-off-by: Eric Wei --- .../org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java index 873c261cc4e..b879987fc63 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java @@ -86,7 +86,7 @@ protected GrammarBundle buildBundle() { /** Invalidate the cached bundle, forcing a rebuild on the next request. */ @VisibleForTesting - public void invalidateCache() { + protected void invalidateCache() { synchronized (bundleLock) { cachedBundle = null; } From 9487e700037fc0df4df7aabe540fd2d9f6b9e259 Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Sun, 1 Mar 2026 17:23:40 -0800 Subject: [PATCH 14/20] add more necessary fields Signed-off-by: Eric Wei --- .../sql/plugin/rest/RestPPLGrammarAction.java | 5 + .../plugin/rest/RestPPLGrammarActionTest.java | 5 + .../sql/ppl/autocomplete/GrammarBundle.java | 23 +++ .../autocomplete/PPLGrammarBundleBuilder.java | 133 ++++++++++++++++++ .../PPLGrammarBundleBuilderTest.java | 36 +++++ 5 files changed, 202 insertions(+) diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java index b879987fc63..ca3442d0272 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java @@ -115,6 +115,11 @@ private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) thro builder.field("literalNames", bundle.getLiteralNames()); builder.field("symbolicNames", bundle.getSymbolicNames()); + // Autocomplete configuration + builder.field("tokenDictionary", bundle.getTokenDictionary()); + builder.field("ignoredTokens", bundle.getIgnoredTokens()); + builder.field("rulesToVisit", bundle.getRulesToVisit()); + builder.endObject(); } } diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java index ef10c7d6cbf..bc05d08192a 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -93,6 +93,11 @@ public void testGetGrammar_ReturnsBundle() throws Exception { // Vocabulary (non-empty arrays) assertTrue(json.getJSONArray("literalNames").length() > 0); assertTrue(json.getJSONArray("symbolicNames").length() > 0); + + // Autocomplete configuration + assertTrue(json.getJSONObject("tokenDictionary").length() > 0); + assertTrue(json.getJSONArray("ignoredTokens").length() > 0); + assertTrue(json.getJSONArray("rulesToVisit").length() > 0); } @Test diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java index 2c508b3853e..f0a8c8968cc 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java @@ -5,6 +5,7 @@ package org.opensearch.sql.ppl.autocomplete; +import java.util.Map; import lombok.Builder; import lombok.NonNull; import lombok.Value; @@ -55,4 +56,26 @@ public class GrammarBundle { * tokens with no symbolic name; clients must handle sparse arrays. */ @NonNull private String[] symbolicNames; + + /** + * Autocomplete token dictionary — maps semantic names used by the autocomplete enrichment logic + * (e.g. "SPACE", "PIPE", "SOURCE") to their token type IDs in this grammar. Clients use this + * to configure token-aware enrichment without hardcoding token IDs. + */ + @NonNull private Map tokenDictionary; + + /** + * Token type IDs that should be ignored by CodeCompletionCore during candidate collection. + * These are tokens like functions, operators, and internal tokens that should not appear + * as direct keyword suggestions (e.g. AVG, COUNT, PIPE operators). + */ + @NonNull private int[] ignoredTokens; + + /** + * Parser rule indices that CodeCompletionCore should treat as preferred rules. + * When these rules are candidate alternatives, CodeCompletionCore reports them as rule + * candidates instead of expanding into their child tokens. The autocomplete enrichment + * uses these to trigger semantic suggestions (e.g. suggest fields, suggest tables). + */ + @NonNull private int[] rulesToVisit; } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java index 2ec5eada4f6..d030d20e24e 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java @@ -8,7 +8,13 @@ import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; import org.antlr.v4.runtime.CharStreams; import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.Vocabulary; @@ -52,9 +58,136 @@ public GrammarBundle build() { .startRuleIndex(resolveStartRuleIndex(parser.getRuleNames())) .literalNames(literalNames) .symbolicNames(symbolicNames) + .tokenDictionary(buildTokenDictionary(vocabulary)) + .ignoredTokens(buildIgnoredTokens()) + .rulesToVisit(buildRulesToVisit(parser.getRuleNames())) .build(); } + /** + * Build the token dictionary — semantic name → token type ID mapping. Uses lexer constants + * since token type IDs are defined by the lexer. The frontend autocomplete enrichment uses + * these to identify tokens like SPACE, PIPE, SOURCE by name. + */ + private static Map buildTokenDictionary(Vocabulary vocabulary) { + Map dict = new LinkedHashMap<>(); + // SPACE token may not exist in this grammar (whitespace may be implicitly skipped). + // Resolve by searching symbolic names; use -1 if not found. + dict.put("WHITESPACE", OpenSearchPPLLexer.WHITESPACE); + dict.put("FROM", OpenSearchPPLLexer.FROM); + dict.put("OPENING_BRACKET", OpenSearchPPLLexer.LT_PRTHS); + dict.put("CLOSING_BRACKET", OpenSearchPPLLexer.RT_PRTHS); + dict.put("SEARCH", OpenSearchPPLLexer.SEARCH); + dict.put("SOURCE", OpenSearchPPLLexer.SOURCE); + dict.put("PIPE", OpenSearchPPLLexer.PIPE); + dict.put("ID", OpenSearchPPLLexer.ID); + dict.put("EQUAL", OpenSearchPPLLexer.EQUAL); + dict.put("IN", OpenSearchPPLLexer.IN); + dict.put("COMMA", OpenSearchPPLLexer.COMMA); + dict.put("BACKTICK_QUOTE", OpenSearchPPLLexer.BQUOTA_STRING); + dict.put("DOT", OpenSearchPPLLexer.DOT); + return dict; + } + + /** + * Build the list of token type IDs to ignore for autocomplete. Mirrors the frontend + * getIgnoredTokens() logic: explicitly ignore AS/IN, then ignore two contiguous token ranges + * minus operatorsToInclude. + * + *

    Range 1 (relevance/internal tokens): MATCH .. ERROR_RECOGNITION — covers relevance + * functions, search parameters, span literals, IDs, quoted strings, and error tokens. + * + *

    Range 2 (keywords/functions/operators): CASE .. CAST — covers CASE/ELSE, IN, EXISTS, + * NOT/OR/AND/XOR, TRUE/FALSE, REGEXP, datetime parts, data type keywords, punctuation, + * aggregate functions, math/text/date functions, and CAST. + * + *

    Tokens in {@code operatorsToInclude} are kept as suggestions even if they fall within + * an ignored range. + */ + private static int[] buildIgnoredTokens() { + // Verify range boundaries match expected token IDs. If the grammar changes and + // shifts token ordinals, these assertions surface the problem at build time. + assert OpenSearchPPLParser.MATCH == 427 + : "MATCH token ID shifted — update ignored range start"; + assert OpenSearchPPLParser.ERROR_RECOGNITION == 488 + : "ERROR_RECOGNITION token ID shifted — update ignored range end"; + assert OpenSearchPPLParser.CASE == 142 + : "CASE token ID shifted — update ignored range start"; + assert OpenSearchPPLParser.CAST == 387 + : "CAST token ID shifted — update ignored range end"; + + Set operatorsToInclude = new HashSet<>(Arrays.asList( + OpenSearchPPLParser.PIPE, OpenSearchPPLParser.EQUAL, OpenSearchPPLParser.COMMA, + OpenSearchPPLParser.NOT_EQUAL, OpenSearchPPLParser.LESS, OpenSearchPPLParser.NOT_LESS, + OpenSearchPPLParser.GREATER, OpenSearchPPLParser.NOT_GREATER, + OpenSearchPPLParser.OR, OpenSearchPPLParser.AND, + OpenSearchPPLParser.LT_PRTHS, OpenSearchPPLParser.RT_PRTHS, + OpenSearchPPLParser.SPAN, + OpenSearchPPLParser.MATCH, OpenSearchPPLParser.MATCH_PHRASE, + OpenSearchPPLParser.MATCH_BOOL_PREFIX, OpenSearchPPLParser.MATCH_PHRASE_PREFIX, + OpenSearchPPLParser.SQUOTA_STRING + )); + + List ignored = new ArrayList<>(); + ignored.add(OpenSearchPPLParser.AS); + ignored.add(OpenSearchPPLParser.IN); + + // Range 1: MATCH .. ERROR_RECOGNITION + for (int i = OpenSearchPPLParser.MATCH; i <= OpenSearchPPLParser.ERROR_RECOGNITION; i++) { + if (!operatorsToInclude.contains(i)) { + ignored.add(i); + } + } + + // Range 2: CASE .. CAST + for (int i = OpenSearchPPLParser.CASE; i <= OpenSearchPPLParser.CAST; i++) { + if (!operatorsToInclude.contains(i)) { + ignored.add(i); + } + } + + return ignored.stream().mapToInt(Integer::intValue).toArray(); + } + + /** + * Build the list of parser rule indices for CodeCompletionCore preferredRules. + * These rules trigger semantic suggestions (suggest fields, tables, functions, etc.). + * + * @throws IllegalStateException if any expected rule name is not found in the parser grammar + */ + private static int[] buildRulesToVisit(String[] ruleNames) { + List ruleNamesToVisit = Arrays.asList( + "statsFunctionName", "takeAggFunction", "integerLiteral", "decimalLiteral", + "keywordsCanBeId", "renameClasue", "qualifiedName", "tableQualifiedName", + "wcQualifiedName", "positionFunctionName", "searchableKeyWord", "stringLiteral", + "searchCommand", "searchComparisonOperator", "comparisonOperator", "sqlLikeJoinType" + ); + + List ruleNamesList = Arrays.asList(ruleNames); + int[] indices = new int[ruleNamesToVisit.size()]; + for (int i = 0; i < ruleNamesToVisit.size(); i++) { + String name = ruleNamesToVisit.get(i); + int idx = ruleNamesList.indexOf(name); + if (idx < 0) { + throw new IllegalStateException( + "Parser rule '" + name + "' not found in grammar — " + + "was it renamed or removed from OpenSearchPPLParser.g4?"); + } + indices[i] = idx; + } + return indices; + } + + /** Resolve a token type ID from the vocabulary by symbolic name. Returns -1 if not found. */ + private static int resolveTokenType(Vocabulary vocabulary, String name) { + for (int i = 0; i <= vocabulary.getMaxTokenType(); i++) { + if (name.equals(vocabulary.getSymbolicName(i))) { + return i; + } + } + return -1; + } + private static int resolveStartRuleIndex(String[] ruleNames) { int idx = Arrays.asList(ruleNames).indexOf("root"); return Math.max(idx, 0); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java index ddfc8876c62..49115db5143 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java @@ -9,8 +9,10 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import java.util.Map; import org.junit.BeforeClass; import org.junit.Test; +import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; public class PPLGrammarBundleBuilderTest { @@ -123,4 +125,38 @@ public void testBuildIsDeterministic() { bundle.getGrammarHash(), second.getGrammarHash()); } + + @Test + public void testTokenDictionaryContainsExpectedEntries() { + Map dict = bundle.getTokenDictionary(); + assertNotNull(dict); + assertEquals((Integer) OpenSearchPPLParser.PIPE, dict.get("PIPE")); + assertEquals((Integer) OpenSearchPPLParser.SOURCE, dict.get("SOURCE")); + assertEquals((Integer) OpenSearchPPLParser.FROM, dict.get("FROM")); + assertEquals((Integer) OpenSearchPPLParser.EQUAL, dict.get("EQUAL")); + assertEquals((Integer) OpenSearchPPLParser.ID, dict.get("ID")); + } + + @Test + public void testIgnoredTokensAreNonEmpty() { + assertNotNull(bundle.getIgnoredTokens()); + assertTrue("ignoredTokens should not be empty", bundle.getIgnoredTokens().length > 0); + } + + @Test + public void testRulesToVisitAreNonEmpty() { + assertNotNull(bundle.getRulesToVisit()); + assertTrue("rulesToVisit should not be empty", bundle.getRulesToVisit().length > 0); + } + + @Test + public void testIgnoredRangeBoundariesMatchGrammar() { + // These assertions mirror the runtime assertions in buildIgnoredTokens(). + // If the grammar changes token ordinals, both this test and the builder assertions + // will flag the issue. + assertEquals("MATCH token ID", 427, OpenSearchPPLParser.MATCH); + assertEquals("ERROR_RECOGNITION token ID", 488, OpenSearchPPLParser.ERROR_RECOGNITION); + assertEquals("CASE token ID", 142, OpenSearchPPLParser.CASE); + assertEquals("CAST token ID", 387, OpenSearchPPLParser.CAST); + } } From 2f42c0f44a2840b6b8d4091317b96f2811b08b5c Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Tue, 3 Mar 2026 06:29:55 -0800 Subject: [PATCH 15/20] adjusting ignore token set to be lexical/internal only Signed-off-by: Eric Wei --- .../autocomplete/PPLGrammarBundleBuilder.java | 83 ++++++-------- .../PPLGrammarBundleBuilderTest.java | 107 ++++++++++++++++-- 2 files changed, 132 insertions(+), 58 deletions(-) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java index d030d20e24e..fc3cdb6b510 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java @@ -27,6 +27,22 @@ public class PPLGrammarBundleBuilder { private static final String ANTLR_VERSION = org.antlr.v4.runtime.RuntimeMetaData.getRuntimeVersion(); private static final String BUNDLE_VERSION = "1.0"; + private static final Set INTERNAL_NON_LITERAL_TOKENS = + new HashSet<>( + Arrays.asList( + "ID", + "NUMERIC_ID", + "ID_DATE_SUFFIX", + "CLUSTER", + "TIME_SNAP", + "SPANLENGTH", + "DECIMAL_SPANLENGTH", + "DQUOTA_STRING", + "SQUOTA_STRING", + "BQUOTA_STRING", + "LINE_COMMENT", + "BLOCK_COMMENT", + "ERROR_RECOGNITION")); public GrammarBundle build() { OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString("")); @@ -59,7 +75,7 @@ public GrammarBundle build() { .literalNames(literalNames) .symbolicNames(symbolicNames) .tokenDictionary(buildTokenDictionary(vocabulary)) - .ignoredTokens(buildIgnoredTokens()) + .ignoredTokens(buildIgnoredTokens(vocabulary)) .rulesToVisit(buildRulesToVisit(parser.getRuleNames())) .build(); } @@ -90,65 +106,32 @@ private static Map buildTokenDictionary(Vocabulary vocabulary) } /** - * Build the list of token type IDs to ignore for autocomplete. Mirrors the frontend - * getIgnoredTokens() logic: explicitly ignore AS/IN, then ignore two contiguous token ranges - * minus operatorsToInclude. + * Build token type IDs to ignore for autocomplete. * - *

    Range 1 (relevance/internal tokens): MATCH .. ERROR_RECOGNITION — covers relevance - * functions, search parameters, span literals, IDs, quoted strings, and error tokens. - * - *

    Range 2 (keywords/functions/operators): CASE .. CAST — covers CASE/ELSE, IN, EXISTS, - * NOT/OR/AND/XOR, TRUE/FALSE, REGEXP, datetime parts, data type keywords, punctuation, - * aggregate functions, math/text/date functions, and CAST. - * - *

    Tokens in {@code operatorsToInclude} are kept as suggestions even if they fall within - * an ignored range. + *

    Only lexical/internal tokens are ignored (identifiers, literals, quoted-string tokens, + * comments, and error token). User-facing commands/functions/operators are intentionally kept so + * completion dynamically reflects grammar changes. */ - private static int[] buildIgnoredTokens() { - // Verify range boundaries match expected token IDs. If the grammar changes and - // shifts token ordinals, these assertions surface the problem at build time. - assert OpenSearchPPLParser.MATCH == 427 - : "MATCH token ID shifted — update ignored range start"; - assert OpenSearchPPLParser.ERROR_RECOGNITION == 488 - : "ERROR_RECOGNITION token ID shifted — update ignored range end"; - assert OpenSearchPPLParser.CASE == 142 - : "CASE token ID shifted — update ignored range start"; - assert OpenSearchPPLParser.CAST == 387 - : "CAST token ID shifted — update ignored range end"; - - Set operatorsToInclude = new HashSet<>(Arrays.asList( - OpenSearchPPLParser.PIPE, OpenSearchPPLParser.EQUAL, OpenSearchPPLParser.COMMA, - OpenSearchPPLParser.NOT_EQUAL, OpenSearchPPLParser.LESS, OpenSearchPPLParser.NOT_LESS, - OpenSearchPPLParser.GREATER, OpenSearchPPLParser.NOT_GREATER, - OpenSearchPPLParser.OR, OpenSearchPPLParser.AND, - OpenSearchPPLParser.LT_PRTHS, OpenSearchPPLParser.RT_PRTHS, - OpenSearchPPLParser.SPAN, - OpenSearchPPLParser.MATCH, OpenSearchPPLParser.MATCH_PHRASE, - OpenSearchPPLParser.MATCH_BOOL_PREFIX, OpenSearchPPLParser.MATCH_PHRASE_PREFIX, - OpenSearchPPLParser.SQUOTA_STRING - )); - + private static int[] buildIgnoredTokens(Vocabulary vocabulary) { List ignored = new ArrayList<>(); - ignored.add(OpenSearchPPLParser.AS); - ignored.add(OpenSearchPPLParser.IN); - // Range 1: MATCH .. ERROR_RECOGNITION - for (int i = OpenSearchPPLParser.MATCH; i <= OpenSearchPPLParser.ERROR_RECOGNITION; i++) { - if (!operatorsToInclude.contains(i)) { - ignored.add(i); - } - } - - // Range 2: CASE .. CAST - for (int i = OpenSearchPPLParser.CASE; i <= OpenSearchPPLParser.CAST; i++) { - if (!operatorsToInclude.contains(i)) { - ignored.add(i); + for (int tokenType = 0; tokenType <= vocabulary.getMaxTokenType(); tokenType++) { + String symbolicName = vocabulary.getSymbolicName(tokenType); + if (isLexicalInternalToken(symbolicName)) { + ignored.add(tokenType); } } return ignored.stream().mapToInt(Integer::intValue).toArray(); } + private static boolean isLexicalInternalToken(String symbolicName) { + if (symbolicName == null) { + return false; + } + return symbolicName.endsWith("_LITERAL") || INTERNAL_NON_LITERAL_TOKENS.contains(symbolicName); + } + /** * Build the list of parser rule indices for CodeCompletionCore preferredRules. * These rules trigger semantic suggestions (suggest fields, tables, functions, etc.). diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java index 49115db5143..fb5dd4dac9b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java @@ -6,10 +6,14 @@ package org.opensearch.sql.ppl.autocomplete; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import java.util.Arrays; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import org.junit.BeforeClass; import org.junit.Test; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; @@ -17,6 +21,22 @@ public class PPLGrammarBundleBuilderTest { private static final int EXPECTED_ATN_SERIALIZATION_VERSION = 4; + private static final Set EXPECTED_IGNORED_NON_LITERAL_SYMBOLS = + new HashSet<>( + Arrays.asList( + "ID", + "NUMERIC_ID", + "ID_DATE_SUFFIX", + "CLUSTER", + "TIME_SNAP", + "SPANLENGTH", + "DECIMAL_SPANLENGTH", + "DQUOTA_STRING", + "SQUOTA_STRING", + "BQUOTA_STRING", + "LINE_COMMENT", + "BLOCK_COMMENT", + "ERROR_RECOGNITION")); private static GrammarBundle bundle; @@ -150,13 +170,84 @@ public void testRulesToVisitAreNonEmpty() { } @Test - public void testIgnoredRangeBoundariesMatchGrammar() { - // These assertions mirror the runtime assertions in buildIgnoredTokens(). - // If the grammar changes token ordinals, both this test and the builder assertions - // will flag the issue. - assertEquals("MATCH token ID", 427, OpenSearchPPLParser.MATCH); - assertEquals("ERROR_RECOGNITION token ID", 488, OpenSearchPPLParser.ERROR_RECOGNITION); - assertEquals("CASE token ID", 142, OpenSearchPPLParser.CASE); - assertEquals("CAST token ID", 387, OpenSearchPPLParser.CAST); + public void testIgnoredTokensContainOnlyLexicalInternalTokens() { + Set ignored = ignoredTokenSet(); + for (Integer tokenType : ignored) { + String symbol = bundle.getSymbolicNames()[tokenType]; + assertTrue( + "ignoredTokens should contain only lexical/internal tokens, got: " + + symbol + + " (" + + tokenType + + ")", + symbol != null + && (symbol.endsWith("_LITERAL") + || EXPECTED_IGNORED_NON_LITERAL_SYMBOLS.contains(symbol))); + } + } + + @Test + public void testCommandAndKeywordTokensAreNotIgnored() { + Set ignored = ignoredTokenSet(); + assertFalse("LOOKUP should not be ignored", ignored.contains(OpenSearchPPLParser.LOOKUP)); + assertFalse("REPLACE should not be ignored", ignored.contains(OpenSearchPPLParser.REPLACE)); + assertFalse("REVERSE should not be ignored", ignored.contains(OpenSearchPPLParser.REVERSE)); + assertFalse("MVCOMBINE should not be ignored", ignored.contains(OpenSearchPPLParser.MVCOMBINE)); + assertFalse("MVEXPAND should not be ignored", ignored.contains(OpenSearchPPLParser.MVEXPAND)); + assertFalse("LEFT should not be ignored", ignored.contains(OpenSearchPPLParser.LEFT)); + assertFalse("RIGHT should not be ignored", ignored.contains(OpenSearchPPLParser.RIGHT)); + assertFalse("AS should not be ignored", ignored.contains(OpenSearchPPLParser.AS)); + assertFalse("IN should not be ignored", ignored.contains(OpenSearchPPLParser.IN)); + } + + @Test + public void testExpressionFunctionTokensAreNotIgnored() { + Set ignored = ignoredTokenSet(); + assertFalse("MVAPPEND should not be ignored", ignored.contains(OpenSearchPPLParser.MVAPPEND)); + assertFalse("MVJOIN should not be ignored", ignored.contains(OpenSearchPPLParser.MVJOIN)); + assertFalse("MVINDEX should not be ignored", ignored.contains(OpenSearchPPLParser.MVINDEX)); + } + + @Test + public void testNewerGrammarKeywordsAreNotIgnoredWhenPresent() { + // These tokens exist in newer grammar variants (for example graph lookup support). + // Keep this test tolerant so it works across branches with different grammar revisions. + assertTokenNotIgnoredIfPresent("GRAPHLOOKUP"); + assertTokenNotIgnoredIfPresent("START_FIELD"); + assertTokenNotIgnoredIfPresent("FROM_FIELD"); + assertTokenNotIgnoredIfPresent("TO_FIELD"); + assertTokenNotIgnoredIfPresent("MAX_DEPTH"); + assertTokenNotIgnoredIfPresent("DEPTH_FIELD"); + assertTokenNotIgnoredIfPresent("DIRECTION"); + assertTokenNotIgnoredIfPresent("UNI"); + assertTokenNotIgnoredIfPresent("BI"); + assertTokenNotIgnoredIfPresent("SUPPORT_ARRAY"); + assertTokenNotIgnoredIfPresent("BATCH_MODE"); + assertTokenNotIgnoredIfPresent("USE_PIT"); + } + + private static Set ignoredTokenSet() { + Set ignored = new HashSet<>(); + for (int tokenType : bundle.getIgnoredTokens()) { + ignored.add(tokenType); + } + return ignored; + } + + private static void assertTokenNotIgnoredIfPresent(String symbolicTokenName) { + int tokenType = tokenTypeBySymbolicName(symbolicTokenName); + if (tokenType >= 0) { + assertFalse(symbolicTokenName + " should not be ignored", ignoredTokenSet().contains(tokenType)); + } + } + + private static int tokenTypeBySymbolicName(String symbolicTokenName) { + String[] symbols = bundle.getSymbolicNames(); + for (int i = 0; i < symbols.length; i++) { + if (symbolicTokenName.equals(symbols[i])) { + return i; + } + } + return -1; } } From 6dae31b50059552e0e725479e43a0f9056d1f151 Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Tue, 3 Mar 2026 07:14:37 -0800 Subject: [PATCH 16/20] addressed fix-now comments Signed-off-by: Eric Wei --- docs/user/ppl/interfaces/endpoint.md | 32 ++++++ .../rest-api-spec/api/ppl.grammar.json | 18 ++++ .../rest-api-spec/test/api/ppl.grammar.yml | 18 ++++ .../sql/plugin/rest/RestPPLGrammarAction.java | 27 ++--- .../plugin/rest/RestPPLGrammarActionTest.java | 46 +++++++-- .../sql/ppl/autocomplete/GrammarBundle.java | 98 +++++++++++++++++-- .../autocomplete/PPLGrammarBundleBuilder.java | 72 +++++++++++--- .../PPLGrammarBundleBuilderTest.java | 38 ++++++- 8 files changed, 296 insertions(+), 53 deletions(-) create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/api/ppl.grammar.json create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/api/ppl.grammar.yml diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index d5958ba3250..b52412880db 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -201,3 +201,35 @@ Expected output (trimmed): - Plan node names use Calcite physical operator names (for example, `EnumerableCalc` or `CalciteEnumerableIndexScan`). - Plan `time_ms` is inclusive of child operators and represents wall-clock time; overlapping work can make summed plan times exceed `summary.total_time_ms`. - Scan nodes reflect operator wall-clock time; background prefetch can make scan time smaller than total request latency. + +## Grammar (Experimental) + +### Description + +You can send an HTTP GET request to endpoint **/_plugins/_ppl/_grammar** to fetch serialized PPL grammar metadata used by autocomplete clients. + +### Example + +```bash +curl -sS -X GET localhost:9200/_plugins/_ppl/_grammar +``` + +Expected output (trimmed): + +```json +{ + "bundleVersion": "1.0", + "antlrVersion": "4.13.2", + "grammarHash": "sha256:...", + "startRuleIndex": 0, + "lexerSerializedATN": [4, ...], + "parserSerializedATN": [4, ...], + "lexerRuleNames": ["SEARCH", "..."], + "parserRuleNames": ["root", "..."], + "literalNames": [null, "'SEARCH'", "..."], + "symbolicNames": [null, "SEARCH", "..."], + "tokenDictionary": {"PIPE": 196, "...": 0}, + "ignoredTokens": [472, 473, "..."], + "rulesToVisit": [200, 201, "..."] +} +``` diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/api/ppl.grammar.json b/integ-test/src/yamlRestTest/resources/rest-api-spec/api/ppl.grammar.json new file mode 100644 index 00000000000..d2be3e0189d --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/api/ppl.grammar.json @@ -0,0 +1,18 @@ +{ + "ppl.grammar": { + "documentation": { + "url": "https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/interfaces/endpoint.md", + "description": "PPL Grammar Endpoint for Autocomplete" + }, + "stability": "experimental", + "url": { + "paths": [ + { + "path": "/_plugins/_ppl/_grammar", + "methods": ["GET"] + } + ] + }, + "params": {} + } +} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/api/ppl.grammar.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/api/ppl.grammar.yml new file mode 100644 index 00000000000..bb1ade36bda --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/api/ppl.grammar.yml @@ -0,0 +1,18 @@ +--- +"PPL grammar endpoint returns expected response shape": + - do: + ppl.grammar: {} + - is_true: bundleVersion + - is_true: antlrVersion + - is_true: grammarHash + - match: {startRuleIndex: 0} + - gt: {lexerSerializedATN.0: 0} + - is_true: lexerRuleNames.0 + - is_true: channelNames.0 + - is_true: modeNames.0 + - gt: {parserSerializedATN.0: 0} + - is_true: parserRuleNames.0 + - is_true: symbolicNames.1 + - is_true: tokenDictionary.PIPE + - gt: {ignoredTokens.0: 0} + - gt: {rulesToVisit.0: 0} diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java index ca3442d0272..ff29dfebeb6 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java @@ -34,8 +34,7 @@ public class RestPPLGrammarAction extends BaseRestHandler { private static final String ENDPOINT_PATH = "/_plugins/_ppl/_grammar"; // Lazy-initialized singleton bundle (built once per JVM lifecycle) - private volatile GrammarBundle cachedBundle; - private final Object bundleLock = new Object(); + private GrammarBundle cachedBundle; @Override public String getName() { @@ -59,23 +58,17 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder)); } catch (Exception e) { log.error("Error building or serializing PPL grammar", e); - channel.sendResponse(new BytesRestResponse(channel, RestStatus.INTERNAL_SERVER_ERROR, e)); + channel.sendResponse(new BytesRestResponse(channel, e)); } }; } - // Thread-safe lazy initialization with double-checked locking. - private GrammarBundle getOrBuildBundle() { - if (cachedBundle != null) { - return cachedBundle; - } - synchronized (bundleLock) { - // double-check after acquiring lock - if (cachedBundle == null) { - cachedBundle = buildBundle(); - } - return cachedBundle; + // Thread-safe lazy initialization. + private synchronized GrammarBundle getOrBuildBundle() { + if (cachedBundle == null) { + cachedBundle = buildBundle(); } + return cachedBundle; } /** Constructs the grammar bundle. Override in tests to inject a custom or failing builder. */ @@ -86,10 +79,8 @@ protected GrammarBundle buildBundle() { /** Invalidate the cached bundle, forcing a rebuild on the next request. */ @VisibleForTesting - protected void invalidateCache() { - synchronized (bundleLock) { - cachedBundle = null; - } + protected synchronized void invalidateCache() { + cachedBundle = null; } private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) throws IOException { diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java index bc05d08192a..575c1db6b92 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -137,16 +137,27 @@ protected GrammarBundle buildBundle() { @Test public void testInvalidateCache() throws Exception { + AtomicInteger buildCount = new AtomicInteger(0); + RestPPLGrammarAction countingAction = + new RestPPLGrammarAction() { + @Override + protected GrammarBundle buildBundle() { + buildCount.incrementAndGet(); + return super.buildBundle(); + } + }; + FakeRestRequest request1 = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) .withMethod(RestRequest.Method.GET) .withPath("/_plugins/_ppl/_grammar") .build(); MockRestChannel channel1 = new MockRestChannel(request1, true); - action.handleRequest(request1, channel1, client); + countingAction.handleRequest(request1, channel1, client); assertEquals(RestStatus.OK, channel1.getResponse().status()); + assertEquals("Bundle should be built on first request", 1, buildCount.get()); - action.invalidateCache(); + countingAction.invalidateCache(); FakeRestRequest request2 = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) @@ -154,15 +165,9 @@ public void testInvalidateCache() throws Exception { .withPath("/_plugins/_ppl/_grammar") .build(); MockRestChannel channel2 = new MockRestChannel(request2, true); - action.handleRequest(request2, channel2, client); + countingAction.handleRequest(request2, channel2, client); assertEquals(RestStatus.OK, channel2.getResponse().status()); - - String content1 = channel1.getResponse().content().utf8ToString(); - String content2 = channel2.getResponse().content().utf8ToString(); - assertEquals( - "Grammar hash should be identical after cache invalidation and rebuild", - content1, - content2); + assertEquals("Bundle should be rebuilt after cache invalidation", 2, buildCount.get()); } @Test @@ -186,6 +191,27 @@ protected GrammarBundle buildBundle() { assertEquals(RestStatus.INTERNAL_SERVER_ERROR, channel.getResponse().status()); } + @Test + public void testGetGrammar_NullBundle_Returns500() throws Exception { + RestPPLGrammarAction nullBundleAction = + new RestPPLGrammarAction() { + @Override + protected GrammarBundle buildBundle() { + return null; + } + }; + + FakeRestRequest request = + new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) + .withMethod(RestRequest.Method.GET) + .withPath("/_plugins/_ppl/_grammar") + .build(); + MockRestChannel channel = new MockRestChannel(request, true); + nullBundleAction.handleRequest(request, channel, client); + + assertEquals(RestStatus.INTERNAL_SERVER_ERROR, channel.getResponse().status()); + } + /** Mock RestChannel to capture responses */ private static class MockRestChannel implements RestChannel { private final RestRequest request; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java index f0a8c8968cc..552c3f39167 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java @@ -5,6 +5,9 @@ package org.opensearch.sql.ppl.autocomplete; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashMap; import java.util.Map; import lombok.Builder; import lombok.NonNull; @@ -21,7 +24,10 @@ public class GrammarBundle { /** ANTLR runtime version used to generate the grammar. */ @NonNull private String antlrVersion; - /** SHA-256 hash of the serialized ATN data. Clients may use this to detect grammar changes. */ + /** + * SHA-256 hash of grammar metadata used by autocomplete (ATN, rule names, vocabulary, ANTLR + * version). Clients may use this to detect grammar changes. + */ @NonNull private String grammarHash; /** Serialized lexer ATN as int array (ATNSerializer output). */ @@ -59,23 +65,95 @@ public class GrammarBundle { /** * Autocomplete token dictionary — maps semantic names used by the autocomplete enrichment logic - * (e.g. "SPACE", "PIPE", "SOURCE") to their token type IDs in this grammar. Clients use this - * to configure token-aware enrichment without hardcoding token IDs. + * (e.g. "SPACE", "PIPE", "SOURCE") to their token type IDs in this grammar. Clients use this to + * configure token-aware enrichment without hardcoding token IDs. */ @NonNull private Map tokenDictionary; /** - * Token type IDs that should be ignored by CodeCompletionCore during candidate collection. - * These are tokens like functions, operators, and internal tokens that should not appear - * as direct keyword suggestions (e.g. AVG, COUNT, PIPE operators). + * Token type IDs that should be ignored by CodeCompletionCore during candidate collection. These + * are lexical/internal tokens that should not appear as direct keyword suggestions. */ @NonNull private int[] ignoredTokens; /** - * Parser rule indices that CodeCompletionCore should treat as preferred rules. - * When these rules are candidate alternatives, CodeCompletionCore reports them as rule - * candidates instead of expanding into their child tokens. The autocomplete enrichment - * uses these to trigger semantic suggestions (e.g. suggest fields, suggest tables). + * Parser rule indices that CodeCompletionCore should treat as preferred rules. When these rules + * are candidate alternatives, CodeCompletionCore reports them as rule candidates instead of + * expanding into their child tokens. The autocomplete enrichment uses these to trigger semantic + * suggestions (e.g. suggest fields, suggest tables). */ @NonNull private int[] rulesToVisit; + + public int[] getLexerSerializedATN() { + return copy(lexerSerializedATN); + } + + public String[] getLexerRuleNames() { + return copy(lexerRuleNames); + } + + public String[] getChannelNames() { + return copy(channelNames); + } + + public String[] getModeNames() { + return copy(modeNames); + } + + public int[] getParserSerializedATN() { + return copy(parserSerializedATN); + } + + public String[] getParserRuleNames() { + return copy(parserRuleNames); + } + + public String[] getLiteralNames() { + return copy(literalNames); + } + + public String[] getSymbolicNames() { + return copy(symbolicNames); + } + + public Map getTokenDictionary() { + return Collections.unmodifiableMap(new LinkedHashMap<>(tokenDictionary)); + } + + public int[] getIgnoredTokens() { + return copy(ignoredTokens); + } + + public int[] getRulesToVisit() { + return copy(rulesToVisit); + } + + public static class GrammarBundleBuilder { + public GrammarBundle build() { + return new GrammarBundle( + bundleVersion, + antlrVersion, + grammarHash, + copy(lexerSerializedATN), + copy(lexerRuleNames), + copy(channelNames), + copy(modeNames), + copy(parserSerializedATN), + copy(parserRuleNames), + startRuleIndex, + copy(literalNames), + copy(symbolicNames), + Collections.unmodifiableMap(new LinkedHashMap<>(tokenDictionary)), + copy(ignoredTokens), + copy(rulesToVisit)); + } + } + + private static int[] copy(int[] values) { + return Arrays.copyOf(values, values.length); + } + + private static String[] copy(String[] values) { + return Arrays.copyOf(values, values.length); + } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java index fc3cdb6b510..8c5f7db70ec 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java @@ -64,7 +64,14 @@ public GrammarBundle build() { return GrammarBundle.builder() .bundleVersion(BUNDLE_VERSION) .antlrVersion(ANTLR_VERSION) - .grammarHash(computeGrammarHash(lexerATN, parserATN)) + .grammarHash( + computeGrammarHash( + lexerATN, + parserATN, + lexer.getRuleNames(), + parser.getRuleNames(), + literalNames, + symbolicNames)) .lexerSerializedATN(lexerATN) .parserSerializedATN(parserATN) .lexerRuleNames(lexer.getRuleNames()) @@ -81,9 +88,9 @@ public GrammarBundle build() { } /** - * Build the token dictionary — semantic name → token type ID mapping. Uses lexer constants - * since token type IDs are defined by the lexer. The frontend autocomplete enrichment uses - * these to identify tokens like SPACE, PIPE, SOURCE by name. + * Build the token dictionary — semantic name → token type ID mapping. Uses lexer constants since + * token type IDs are defined by the lexer. The frontend autocomplete enrichment uses these to + * identify tokens like SPACE, PIPE, SOURCE by name. */ private static Map buildTokenDictionary(Vocabulary vocabulary) { Map dict = new LinkedHashMap<>(); @@ -133,18 +140,30 @@ private static boolean isLexicalInternalToken(String symbolicName) { } /** - * Build the list of parser rule indices for CodeCompletionCore preferredRules. - * These rules trigger semantic suggestions (suggest fields, tables, functions, etc.). + * Build the list of parser rule indices for CodeCompletionCore preferredRules. These rules + * trigger semantic suggestions (suggest fields, tables, functions, etc.). * * @throws IllegalStateException if any expected rule name is not found in the parser grammar */ private static int[] buildRulesToVisit(String[] ruleNames) { - List ruleNamesToVisit = Arrays.asList( - "statsFunctionName", "takeAggFunction", "integerLiteral", "decimalLiteral", - "keywordsCanBeId", "renameClasue", "qualifiedName", "tableQualifiedName", - "wcQualifiedName", "positionFunctionName", "searchableKeyWord", "stringLiteral", - "searchCommand", "searchComparisonOperator", "comparisonOperator", "sqlLikeJoinType" - ); + List ruleNamesToVisit = + Arrays.asList( + "statsFunctionName", + "takeAggFunction", + "integerLiteral", + "decimalLiteral", + "keywordsCanBeId", + "renameClasue", + "qualifiedName", + "tableQualifiedName", + "wcQualifiedName", + "positionFunctionName", + "searchableKeyWord", + "stringLiteral", + "searchCommand", + "searchComparisonOperator", + "comparisonOperator", + "sqlLikeJoinType"); List ruleNamesList = Arrays.asList(ruleNames); int[] indices = new int[ruleNamesToVisit.size()]; @@ -153,7 +172,9 @@ private static int[] buildRulesToVisit(String[] ruleNames) { int idx = ruleNamesList.indexOf(name); if (idx < 0) { throw new IllegalStateException( - "Parser rule '" + name + "' not found in grammar — " + "Parser rule '" + + name + + "' not found in grammar — " + "was it renamed or removed from OpenSearchPPLParser.g4?"); } indices[i] = idx; @@ -176,11 +197,21 @@ private static int resolveStartRuleIndex(String[] ruleNames) { return Math.max(idx, 0); } - private static String computeGrammarHash(int[] lexerATN, int[] parserATN) { + private static String computeGrammarHash( + int[] lexerATN, + int[] parserATN, + String[] lexerRuleNames, + String[] parserRuleNames, + String[] literalNames, + String[] symbolicNames) { try { MessageDigest digest = MessageDigest.getInstance("SHA-256"); updateDigest(digest, lexerATN); updateDigest(digest, parserATN); + updateDigest(digest, lexerRuleNames); + updateDigest(digest, parserRuleNames); + updateDigest(digest, literalNames); + updateDigest(digest, symbolicNames); digest.update(ANTLR_VERSION.getBytes(StandardCharsets.UTF_8)); byte[] hash = digest.digest(); // Output is always "sha256:" (7 chars) + 64 hex chars = 71 chars. @@ -203,4 +234,17 @@ private static void updateDigest(MessageDigest digest, int[] data) { digest.update((byte) (v)); } } + + private static void updateDigest(MessageDigest digest, String[] data) { + for (String value : data) { + if (value == null) { + digest.update((byte) 0); + } else { + digest.update((byte) 1); + digest.update(value.getBytes(StandardCharsets.UTF_8)); + } + // field separator to avoid concatenation ambiguities + digest.update((byte) 0xFF); + } + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java index fb5dd4dac9b..590fbe76507 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java @@ -9,6 +9,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.util.Arrays; import java.util.HashSet; @@ -226,6 +227,40 @@ public void testNewerGrammarKeywordsAreNotIgnoredWhenPresent() { assertTokenNotIgnoredIfPresent("USE_PIT"); } + @Test + public void testArrayGettersAreDefensiveCopies() { + int[] ignoredBefore = bundle.getIgnoredTokens(); + int original = ignoredBefore[0]; + ignoredBefore[0] = -1; + assertEquals( + "ignoredTokens getter should return a defensive copy", + original, + bundle.getIgnoredTokens()[0]); + + String[] symbolsBefore = bundle.getSymbolicNames(); + String originalSymbol = symbolsBefore[1]; + symbolsBefore[1] = "MUTATED"; + assertEquals( + "symbolicNames getter should return a defensive copy", + originalSymbol, + bundle.getSymbolicNames()[1]); + } + + @Test + public void testTokenDictionaryGetterIsUnmodifiableCopy() { + Map dict = bundle.getTokenDictionary(); + Integer pipeBefore = dict.get("PIPE"); + + try { + dict.put("PIPE", -1); + fail("tokenDictionary getter should return an unmodifiable map"); + } catch (UnsupportedOperationException expected) { + // expected + } + + assertEquals(pipeBefore, bundle.getTokenDictionary().get("PIPE")); + } + private static Set ignoredTokenSet() { Set ignored = new HashSet<>(); for (int tokenType : bundle.getIgnoredTokens()) { @@ -237,7 +272,8 @@ private static Set ignoredTokenSet() { private static void assertTokenNotIgnoredIfPresent(String symbolicTokenName) { int tokenType = tokenTypeBySymbolicName(symbolicTokenName); if (tokenType >= 0) { - assertFalse(symbolicTokenName + " should not be ignored", ignoredTokenSet().contains(tokenType)); + assertFalse( + symbolicTokenName + " should not be ignored", ignoredTokenSet().contains(tokenType)); } } From 2350fd77f3b8c5d01193c5b611b07bf1ad673fde Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Tue, 3 Mar 2026 07:23:42 -0800 Subject: [PATCH 17/20] fix test duplication Signed-off-by: Eric Wei --- .../plugin/rest/RestPPLGrammarActionTest.java | 49 ++++++------------- 1 file changed, 14 insertions(+), 35 deletions(-) diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java index 575c1db6b92..cb239ce33e1 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -54,11 +54,7 @@ public void testRoutes() { @Test public void testGetGrammar_ReturnsBundle() throws Exception { - FakeRestRequest request = - new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) - .withMethod(RestRequest.Method.GET) - .withPath("/_plugins/_ppl/_grammar") - .build(); + FakeRestRequest request = newGrammarGetRequest(); MockRestChannel channel = new MockRestChannel(request, true); action.handleRequest(request, channel, client); @@ -112,19 +108,11 @@ protected GrammarBundle buildBundle() { } }; - FakeRestRequest request1 = - new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) - .withMethod(RestRequest.Method.GET) - .withPath("/_plugins/_ppl/_grammar") - .build(); + FakeRestRequest request1 = newGrammarGetRequest(); MockRestChannel channel1 = new MockRestChannel(request1, true); countingAction.handleRequest(request1, channel1, client); - FakeRestRequest request2 = - new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) - .withMethod(RestRequest.Method.GET) - .withPath("/_plugins/_ppl/_grammar") - .build(); + FakeRestRequest request2 = newGrammarGetRequest(); MockRestChannel channel2 = new MockRestChannel(request2, true); countingAction.handleRequest(request2, channel2, client); @@ -147,11 +135,7 @@ protected GrammarBundle buildBundle() { } }; - FakeRestRequest request1 = - new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) - .withMethod(RestRequest.Method.GET) - .withPath("/_plugins/_ppl/_grammar") - .build(); + FakeRestRequest request1 = newGrammarGetRequest(); MockRestChannel channel1 = new MockRestChannel(request1, true); countingAction.handleRequest(request1, channel1, client); assertEquals(RestStatus.OK, channel1.getResponse().status()); @@ -159,11 +143,7 @@ protected GrammarBundle buildBundle() { countingAction.invalidateCache(); - FakeRestRequest request2 = - new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) - .withMethod(RestRequest.Method.GET) - .withPath("/_plugins/_ppl/_grammar") - .build(); + FakeRestRequest request2 = newGrammarGetRequest(); MockRestChannel channel2 = new MockRestChannel(request2, true); countingAction.handleRequest(request2, channel2, client); assertEquals(RestStatus.OK, channel2.getResponse().status()); @@ -180,11 +160,7 @@ protected GrammarBundle buildBundle() { } }; - FakeRestRequest request = - new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) - .withMethod(RestRequest.Method.GET) - .withPath("/_plugins/_ppl/_grammar") - .build(); + FakeRestRequest request = newGrammarGetRequest(); MockRestChannel channel = new MockRestChannel(request, true); failingAction.handleRequest(request, channel, client); @@ -201,17 +177,20 @@ protected GrammarBundle buildBundle() { } }; - FakeRestRequest request = - new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) - .withMethod(RestRequest.Method.GET) - .withPath("/_plugins/_ppl/_grammar") - .build(); + FakeRestRequest request = newGrammarGetRequest(); MockRestChannel channel = new MockRestChannel(request, true); nullBundleAction.handleRequest(request, channel, client); assertEquals(RestStatus.INTERNAL_SERVER_ERROR, channel.getResponse().status()); } + private static FakeRestRequest newGrammarGetRequest() { + return new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) + .withMethod(RestRequest.Method.GET) + .withPath("/_plugins/_ppl/_grammar") + .build(); + } + /** Mock RestChannel to capture responses */ private static class MockRestChannel implements RestChannel { private final RestRequest request; From faac85bc0ba9c315368c89b31add06b547bc3395 Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Tue, 3 Mar 2026 07:58:48 -0800 Subject: [PATCH 18/20] Polish grammar bundle builder and stabilize grammar endpoint doctest Signed-off-by: Eric Wei --- docs/user/ppl/interfaces/endpoint.md | 24 ++++++------------- .../autocomplete/PPLGrammarBundleBuilder.java | 18 +++----------- 2 files changed, 10 insertions(+), 32 deletions(-) diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index b52412880db..3f87180dca6 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -211,25 +211,15 @@ You can send an HTTP GET request to endpoint **/_plugins/_ppl/_grammar** to fetc ### Example ```bash -curl -sS -X GET localhost:9200/_plugins/_ppl/_grammar +curl -sS -X GET localhost:9200/_plugins/_ppl/_grammar | python3 -m json.tool | grep -E '"bundleVersion"|"antlrVersion"|"startRuleIndex"|"ignoredTokens"|"rulesToVisit"' ``` Expected output (trimmed): -```json -{ - "bundleVersion": "1.0", - "antlrVersion": "4.13.2", - "grammarHash": "sha256:...", - "startRuleIndex": 0, - "lexerSerializedATN": [4, ...], - "parserSerializedATN": [4, ...], - "lexerRuleNames": ["SEARCH", "..."], - "parserRuleNames": ["root", "..."], - "literalNames": [null, "'SEARCH'", "..."], - "symbolicNames": [null, "SEARCH", "..."], - "tokenDictionary": {"PIPE": 196, "...": 0}, - "ignoredTokens": [472, 473, "..."], - "rulesToVisit": [200, 201, "..."] -} +```text +"bundleVersion": "1.0", +"antlrVersion": "4.13.2", +"startRuleIndex": 0, +"ignoredTokens": [ +"rulesToVisit": [ ``` diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java index 8c5f7db70ec..32d671b7e67 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java @@ -81,7 +81,7 @@ public GrammarBundle build() { .startRuleIndex(resolveStartRuleIndex(parser.getRuleNames())) .literalNames(literalNames) .symbolicNames(symbolicNames) - .tokenDictionary(buildTokenDictionary(vocabulary)) + .tokenDictionary(buildTokenDictionary()) .ignoredTokens(buildIgnoredTokens(vocabulary)) .rulesToVisit(buildRulesToVisit(parser.getRuleNames())) .build(); @@ -90,12 +90,10 @@ public GrammarBundle build() { /** * Build the token dictionary — semantic name → token type ID mapping. Uses lexer constants since * token type IDs are defined by the lexer. The frontend autocomplete enrichment uses these to - * identify tokens like SPACE, PIPE, SOURCE by name. + * identify tokens like PIPE and SOURCE by name. */ - private static Map buildTokenDictionary(Vocabulary vocabulary) { + private static Map buildTokenDictionary() { Map dict = new LinkedHashMap<>(); - // SPACE token may not exist in this grammar (whitespace may be implicitly skipped). - // Resolve by searching symbolic names; use -1 if not found. dict.put("WHITESPACE", OpenSearchPPLLexer.WHITESPACE); dict.put("FROM", OpenSearchPPLLexer.FROM); dict.put("OPENING_BRACKET", OpenSearchPPLLexer.LT_PRTHS); @@ -182,16 +180,6 @@ private static int[] buildRulesToVisit(String[] ruleNames) { return indices; } - /** Resolve a token type ID from the vocabulary by symbolic name. Returns -1 if not found. */ - private static int resolveTokenType(Vocabulary vocabulary, String name) { - for (int i = 0; i <= vocabulary.getMaxTokenType(); i++) { - if (name.equals(vocabulary.getSymbolicName(i))) { - return i; - } - } - return -1; - } - private static int resolveStartRuleIndex(String[] ruleNames) { int idx = Arrays.asList(ruleNames).indexOf("root"); return Math.max(idx, 0); From 2c5b14ebb57e0cb6fd695473bbf05bc0bfb9df6e Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Tue, 3 Mar 2026 09:03:21 -0800 Subject: [PATCH 19/20] address issue: transport action wrapper Signed-off-by: Eric Wei --- .../sql/security/PPLPermissionsIT.java | 43 +++++++++++++++ .../sql/plugin/rest/RestPPLGrammarAction.java | 50 ++++++++++++++--- .../transport/TransportPPLQueryAction.java | 10 +++- .../transport/TransportPPLQueryRequest.java | 11 +++- .../plugin/rest/RestPPLGrammarActionTest.java | 54 ++++++++++++++++++- 5 files changed, 159 insertions(+), 9 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java b/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java index d84ca2af1b3..ed25a1df2d9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java @@ -339,6 +339,23 @@ private JSONObject executeQueryAsUser(String query, String username) throws IOEx return new JSONObject(org.opensearch.sql.legacy.TestUtils.getResponseBody(response, true)); } + /** Executes a grammar metadata request as a specific user with basic authentication. */ + private JSONObject executeGrammarAsUser(String username) throws IOException { + Request request = new Request("GET", "/_plugins/_ppl/_grammar"); + + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader( + "Authorization", + "Basic " + + java.util.Base64.getEncoder() + .encodeToString((username + ":" + STRONG_PASSWORD).getBytes())); + request.setOptions(restOptionsBuilder); + + Response response = client().performRequest(request); + assertEquals(200, response.getStatusLine().getStatusCode()); + return new JSONObject(org.opensearch.sql.legacy.TestUtils.getResponseBody(response, true)); + } + @Test public void testUserWithBankPermissionCanAccessBankIndex() throws IOException { // Test that bank_user can access bank index - this should work with the fix @@ -512,6 +529,32 @@ public void testBankUserWithEvalCommand() throws IOException { verifyColumn(result, columnName("full_name")); } + @Test + public void testUserWithPPLPermissionCanAccessGrammarEndpoint() throws IOException { + JSONObject result = executeGrammarAsUser(BANK_USER); + assertTrue(result.has("bundleVersion")); + assertTrue(result.has("antlrVersion")); + assertTrue(result.has("grammarHash")); + assertTrue(result.has("tokenDictionary")); + } + + @Test + public void testUserWithoutPPLPermissionCannotAccessGrammarEndpoint() throws IOException { + try { + executeGrammarAsUser(NO_PPL_USER); + fail("Expected security exception for user without PPL permission"); + } catch (ResponseException e) { + assertEquals(403, e.getResponse().getStatusLine().getStatusCode()); + String responseBody = + org.opensearch.sql.legacy.TestUtils.getResponseBody(e.getResponse(), false); + assertTrue( + "Response should contain permission error message", + responseBody.contains("no permissions") + || responseBody.contains("Forbidden") + || responseBody.contains("cluster:admin/opensearch/ppl")); + } + } + // Negative test cases for missing permissions @Test diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java index ff29dfebeb6..5c1b128572e 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java @@ -13,11 +13,16 @@ import java.util.List; import lombok.extern.log4j.Log4j2; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.core.action.ActionListener; import org.opensearch.core.rest.RestStatus; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.rest.BaseRestHandler; import org.opensearch.rest.BytesRestResponse; +import org.opensearch.rest.RestChannel; import org.opensearch.rest.RestRequest; +import org.opensearch.sql.plugin.transport.PPLQueryAction; +import org.opensearch.sql.plugin.transport.TransportPPLQueryRequest; +import org.opensearch.sql.plugin.transport.TransportPPLQueryResponse; import org.opensearch.sql.ppl.autocomplete.GrammarBundle; import org.opensearch.sql.ppl.autocomplete.PPLGrammarBundleBuilder; import org.opensearch.transport.client.node.NodeClient; @@ -52,17 +57,50 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli return channel -> { try { - GrammarBundle bundle = getOrBuildBundle(); - XContentBuilder builder = channel.newBuilder(); - serializeBundle(builder, bundle); - channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder)); + authorizeRequest( + client, + new ActionListener<>() { + @Override + public void onResponse(TransportPPLQueryResponse ignored) { + try { + GrammarBundle bundle = getOrBuildBundle(); + XContentBuilder builder = channel.newBuilder(); + serializeBundle(builder, bundle); + channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder)); + } catch (Exception e) { + log.error("Error building or serializing PPL grammar", e); + sendErrorResponse(channel, e); + } + } + + @Override + public void onFailure(Exception e) { + log.error("PPL grammar authorization failed", e); + sendErrorResponse(channel, e); + } + }); } catch (Exception e) { - log.error("Error building or serializing PPL grammar", e); - channel.sendResponse(new BytesRestResponse(channel, e)); + log.error("Error authorizing PPL grammar request", e); + sendErrorResponse(channel, e); } }; } + @VisibleForTesting + protected void authorizeRequest( + NodeClient client, ActionListener listener) { + client.execute( + PPLQueryAction.INSTANCE, new TransportPPLQueryRequest("", null, ENDPOINT_PATH), listener); + } + + private void sendErrorResponse(RestChannel channel, Exception e) { + try { + channel.sendResponse(new BytesRestResponse(channel, e)); + } catch (IOException ioException) { + log.error("Failed to send PPL grammar error response", ioException); + } + } + // Thread-safe lazy initialization. private synchronized GrammarBundle getOrBuildBundle() { if (cachedBundle == null) { diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java index 27bfe2084f7..48bc36374a8 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java @@ -100,13 +100,21 @@ protected void doExecute( + " false")); return; } + + TransportPPLQueryRequest transportRequest = TransportPPLQueryRequest.fromActionRequest(request); + if (transportRequest.isGrammarRequest()) { + // Authorization is enforced by this transport action before returning grammar metadata in + // REST. + listener.onResponse(new TransportPPLQueryResponse("{}")); + return; + } + Metrics.getInstance().getNumericalMetric(MetricName.PPL_REQ_TOTAL).increment(); Metrics.getInstance().getNumericalMetric(MetricName.PPL_REQ_COUNT_TOTAL).increment(); QueryContext.addRequestId(); PPLService pplService = injector.getInstance(PPLService.class); - TransportPPLQueryRequest transportRequest = TransportPPLQueryRequest.fromActionRequest(request); // in order to use PPL service, we need to convert TransportPPLQueryRequest to PPLQueryRequest PPLQueryRequest transformedRequest = transportRequest.toPPLQueryRequest(); QueryContext.setProfile(transformedRequest.profile()); diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequest.java b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequest.java index e342d9a90f0..6db2bd249ae 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequest.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequest.java @@ -119,7 +119,16 @@ public String getRequest() { * @return true if it is an explain request */ public boolean isExplainRequest() { - return path.endsWith("/_explain"); + return path != null && path.endsWith("/_explain"); + } + + /** + * Check if request is for grammar metadata endpoint. + * + * @return true if it is a grammar metadata request + */ + public boolean isGrammarRequest() { + return path != null && path.endsWith("/_grammar"); } /** Decide on the formatter by the requested format. */ diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java index cb239ce33e1..92d56710c08 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -15,8 +15,10 @@ import org.json.JSONObject; import org.junit.Before; import org.junit.Test; +import org.opensearch.OpenSearchStatusException; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.action.ActionListener; import org.opensearch.core.rest.RestStatus; import org.opensearch.core.xcontent.MediaType; import org.opensearch.core.xcontent.NamedXContentRegistry; @@ -24,6 +26,7 @@ import org.opensearch.rest.RestChannel; import org.opensearch.rest.RestRequest; import org.opensearch.rest.RestResponse; +import org.opensearch.sql.plugin.transport.TransportPPLQueryResponse; import org.opensearch.sql.ppl.autocomplete.GrammarBundle; import org.opensearch.test.rest.FakeRestRequest; import org.opensearch.transport.client.node.NodeClient; @@ -36,7 +39,14 @@ public class RestPPLGrammarActionTest { @Before public void setUp() { - action = new RestPPLGrammarAction(); + action = + new RestPPLGrammarAction() { + @Override + protected void authorizeRequest( + NodeClient client, ActionListener listener) { + listener.onResponse(new TransportPPLQueryResponse("{}")); + } + }; client = mock(NodeClient.class); } @@ -101,6 +111,12 @@ public void testGetGrammar_BundleIsCached() throws Exception { AtomicInteger buildCount = new AtomicInteger(0); RestPPLGrammarAction countingAction = new RestPPLGrammarAction() { + @Override + protected void authorizeRequest( + NodeClient client, ActionListener listener) { + listener.onResponse(new TransportPPLQueryResponse("{}")); + } + @Override protected GrammarBundle buildBundle() { buildCount.incrementAndGet(); @@ -128,6 +144,12 @@ public void testInvalidateCache() throws Exception { AtomicInteger buildCount = new AtomicInteger(0); RestPPLGrammarAction countingAction = new RestPPLGrammarAction() { + @Override + protected void authorizeRequest( + NodeClient client, ActionListener listener) { + listener.onResponse(new TransportPPLQueryResponse("{}")); + } + @Override protected GrammarBundle buildBundle() { buildCount.incrementAndGet(); @@ -154,6 +176,12 @@ protected GrammarBundle buildBundle() { public void testGetGrammar_ErrorPath_Returns500() throws Exception { RestPPLGrammarAction failingAction = new RestPPLGrammarAction() { + @Override + protected void authorizeRequest( + NodeClient client, ActionListener listener) { + listener.onResponse(new TransportPPLQueryResponse("{}")); + } + @Override protected GrammarBundle buildBundle() { throw new RuntimeException("simulated build failure"); @@ -171,6 +199,12 @@ protected GrammarBundle buildBundle() { public void testGetGrammar_NullBundle_Returns500() throws Exception { RestPPLGrammarAction nullBundleAction = new RestPPLGrammarAction() { + @Override + protected void authorizeRequest( + NodeClient client, ActionListener listener) { + listener.onResponse(new TransportPPLQueryResponse("{}")); + } + @Override protected GrammarBundle buildBundle() { return null; @@ -184,6 +218,24 @@ protected GrammarBundle buildBundle() { assertEquals(RestStatus.INTERNAL_SERVER_ERROR, channel.getResponse().status()); } + @Test + public void testGetGrammar_AuthorizationFailure_Returns403() throws Exception { + RestPPLGrammarAction unauthorizedAction = + new RestPPLGrammarAction() { + @Override + protected void authorizeRequest( + NodeClient client, ActionListener listener) { + listener.onFailure(new OpenSearchStatusException("forbidden", RestStatus.FORBIDDEN)); + } + }; + + FakeRestRequest request = newGrammarGetRequest(); + MockRestChannel channel = new MockRestChannel(request, true); + unauthorizedAction.handleRequest(request, channel, client); + + assertEquals(RestStatus.FORBIDDEN, channel.getResponse().status()); + } + private static FakeRestRequest newGrammarGetRequest() { return new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) .withMethod(RestRequest.Method.GET) From da140ef88298bff330aaec261ff07c5211270746 Mon Sep 17 00:00:00 2001 From: Eric Wei Date: Tue, 3 Mar 2026 14:00:29 -0800 Subject: [PATCH 20/20] Refactor PPL grammar bundle loading to static holder singleton Signed-off-by: Eric Wei --- .../sql/plugin/rest/RestPPLGrammarAction.java | 25 ++------ .../plugin/rest/RestPPLGrammarActionTest.java | 60 ++++--------------- .../autocomplete/PPLGrammarBundleBuilder.java | 15 ++++- .../PPLGrammarBundleBuilderTest.java | 8 ++- 4 files changed, 33 insertions(+), 75 deletions(-) diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java index 5c1b128572e..f10a3e30831 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java @@ -38,9 +38,6 @@ public class RestPPLGrammarAction extends BaseRestHandler { private static final String ENDPOINT_PATH = "/_plugins/_ppl/_grammar"; - // Lazy-initialized singleton bundle (built once per JVM lifecycle) - private GrammarBundle cachedBundle; - @Override public String getName() { return "ppl_grammar_action"; @@ -63,7 +60,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli @Override public void onResponse(TransportPPLQueryResponse ignored) { try { - GrammarBundle bundle = getOrBuildBundle(); + GrammarBundle bundle = getBundle(); XContentBuilder builder = channel.newBuilder(); serializeBundle(builder, bundle); channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder)); @@ -101,24 +98,10 @@ private void sendErrorResponse(RestChannel channel, Exception e) { } } - // Thread-safe lazy initialization. - private synchronized GrammarBundle getOrBuildBundle() { - if (cachedBundle == null) { - cachedBundle = buildBundle(); - } - return cachedBundle; - } - - /** Constructs the grammar bundle. Override in tests to inject a custom or failing builder. */ - @VisibleForTesting - protected GrammarBundle buildBundle() { - return new PPLGrammarBundleBuilder().build(); - } - - /** Invalidate the cached bundle, forcing a rebuild on the next request. */ + /** Gets the grammar bundle. Override in tests to inject a custom or failing bundle provider. */ @VisibleForTesting - protected synchronized void invalidateCache() { - cachedBundle = null; + protected GrammarBundle getBundle() { + return PPLGrammarBundleBuilder.getBundle(); } private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) throws IOException { diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java index 92d56710c08..2eee8750360 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java @@ -11,7 +11,6 @@ import static org.mockito.Mockito.mock; import java.io.IOException; -import java.util.concurrent.atomic.AtomicInteger; import org.json.JSONObject; import org.junit.Before; import org.junit.Test; @@ -107,9 +106,9 @@ public void testGetGrammar_ReturnsBundle() throws Exception { } @Test - public void testGetGrammar_BundleIsCached() throws Exception { - AtomicInteger buildCount = new AtomicInteger(0); - RestPPLGrammarAction countingAction = + public void testGetGrammar_UsesBundleProvider() throws Exception { + int[] calls = {0}; + RestPPLGrammarAction providerAction = new RestPPLGrammarAction() { @Override protected void authorizeRequest( @@ -118,58 +117,21 @@ protected void authorizeRequest( } @Override - protected GrammarBundle buildBundle() { - buildCount.incrementAndGet(); - return super.buildBundle(); + protected GrammarBundle getBundle() { + calls[0]++; + return super.getBundle(); } }; FakeRestRequest request1 = newGrammarGetRequest(); MockRestChannel channel1 = new MockRestChannel(request1, true); - countingAction.handleRequest(request1, channel1, client); + providerAction.handleRequest(request1, channel1, client); FakeRestRequest request2 = newGrammarGetRequest(); MockRestChannel channel2 = new MockRestChannel(request2, true); - countingAction.handleRequest(request2, channel2, client); + providerAction.handleRequest(request2, channel2, client); - assertEquals("Bundle should be built exactly once", 1, buildCount.get()); - assertEquals( - "Consecutive requests should return identical content", - channel1.getResponse().content().utf8ToString(), - channel2.getResponse().content().utf8ToString()); - } - - @Test - public void testInvalidateCache() throws Exception { - AtomicInteger buildCount = new AtomicInteger(0); - RestPPLGrammarAction countingAction = - new RestPPLGrammarAction() { - @Override - protected void authorizeRequest( - NodeClient client, ActionListener listener) { - listener.onResponse(new TransportPPLQueryResponse("{}")); - } - - @Override - protected GrammarBundle buildBundle() { - buildCount.incrementAndGet(); - return super.buildBundle(); - } - }; - - FakeRestRequest request1 = newGrammarGetRequest(); - MockRestChannel channel1 = new MockRestChannel(request1, true); - countingAction.handleRequest(request1, channel1, client); - assertEquals(RestStatus.OK, channel1.getResponse().status()); - assertEquals("Bundle should be built on first request", 1, buildCount.get()); - - countingAction.invalidateCache(); - - FakeRestRequest request2 = newGrammarGetRequest(); - MockRestChannel channel2 = new MockRestChannel(request2, true); - countingAction.handleRequest(request2, channel2, client); - assertEquals(RestStatus.OK, channel2.getResponse().status()); - assertEquals("Bundle should be rebuilt after cache invalidation", 2, buildCount.get()); + assertEquals("Bundle provider should be invoked once per request", 2, calls[0]); } @Test @@ -183,7 +145,7 @@ protected void authorizeRequest( } @Override - protected GrammarBundle buildBundle() { + protected GrammarBundle getBundle() { throw new RuntimeException("simulated build failure"); } }; @@ -206,7 +168,7 @@ protected void authorizeRequest( } @Override - protected GrammarBundle buildBundle() { + protected GrammarBundle getBundle() { return null; } }; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java index 32d671b7e67..cd992c740a9 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java @@ -23,7 +23,7 @@ import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; /** Builds the {@link GrammarBundle} for the PPL language from the generated ANTLR lexer/parser. */ -public class PPLGrammarBundleBuilder { +public final class PPLGrammarBundleBuilder { private static final String ANTLR_VERSION = org.antlr.v4.runtime.RuntimeMetaData.getRuntimeVersion(); private static final String BUNDLE_VERSION = "1.0"; @@ -44,7 +44,18 @@ public class PPLGrammarBundleBuilder { "BLOCK_COMMENT", "ERROR_RECOGNITION")); - public GrammarBundle build() { + private PPLGrammarBundleBuilder() {} + + private static class BundleHolder { + private static final GrammarBundle INSTANCE = build(); + } + + /** Lazily builds and returns the singleton grammar bundle for this JVM. */ + public static GrammarBundle getBundle() { + return BundleHolder.INSTANCE; + } + + private static GrammarBundle build() { OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString("")); CommonTokenStream tokens = new CommonTokenStream(lexer); OpenSearchPPLParser parser = new OpenSearchPPLParser(tokens); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java index 590fbe76507..3792d98555c 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java @@ -8,6 +8,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -43,7 +44,7 @@ public class PPLGrammarBundleBuilderTest { @BeforeClass public static void buildBundle() { - bundle = new PPLGrammarBundleBuilder().build(); + bundle = PPLGrammarBundleBuilder.getBundle(); } @Test @@ -140,9 +141,10 @@ public void testBuildVocabularyIsNonEmpty() { @Test public void testBuildIsDeterministic() { - GrammarBundle second = new PPLGrammarBundleBuilder().build(); + GrammarBundle second = PPLGrammarBundleBuilder.getBundle(); + assertSame("Bundle accessor should return the same singleton instance", bundle, second); assertEquals( - "Two builds of the same grammar should produce the same hash", + "Repeated accesses should produce the same hash", bundle.getGrammarHash(), second.getGrammarHash()); }