From 039e7e23c0469efb23726a36ed805e14834bf652 Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Thu, 19 Feb 2026 08:25:40 -0800
Subject: [PATCH 01/20] initial commit for grammar API

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../autocomplete/AutocompleteArtifact.java    | 121 ++++++++
 .../autocomplete/GrammarArtifactBuilder.java  | 202 ++++++++++++++
 .../org/opensearch/sql/plugin/SQLPlugin.java  |   2 +
 .../sql/plugin/rest/RestPPLGrammarAction.java | 228 +++++++++++++++
 .../plugin/rest/RestPPLGrammarActionTest.java | 260 ++++++++++++++++++
 .../PPLAutocompleteArtifactBuilder.java       | 122 ++++++++
 6 files changed, 935 insertions(+)
 create mode 100644 core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java
 create mode 100644 core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java
 create mode 100644 plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
 create mode 100644 plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
 create mode 100644 ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java
diff --git a/core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java b/core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java
new file mode 100644
index 00000000000..38b87765cbd
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java
@@ -0,0 +1,121 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.executor.autocomplete;
+
+import java.util.Map;
+import lombok.Builder;
+import lombok.Data;
+
+/**
+ * Autocomplete artifact bundle containing everything needed for client-side grammar-based
+ * autocomplete.
+ *
+ * <p>This bundle is language-agnostic and can be used for PPL, SQL, or any ANTLR-based language. It
+ * contains:
+ *
+ * <ul>
+ *   <li>Serialized ATN data for lexer and parser (for antlr4ng runtime)
+ *   <li>Vocabulary and rule names (for token/rule interpretation)
+ *   <li>Static catalogs (commands, functions, keywords, snippets)
+ *   <li>Token classification mapping (for suggestion categorization)
+ * </ul>
+ *
+ * <p>Frontend uses this bundle to:
+ *
+ * <ol>
+ *   <li>Deserialize ATNs with antlr4ng
+ *   <li>Create LexerInterpreter and ParserInterpreter
+ *   <li>Use antlr4-c3 to find valid tokens at cursor
+ *   <li>Generate suggestions from catalogs
+ * </ol>
+ */
+@Data
+@Builder
+public class AutocompleteArtifact {
+
+  // ============================================================================
+  // Identity & versioning
+  // ============================================================================
+
+  /** Bundle version (increment when format changes) */
+  private String bundleVersion;
+
+  /**
+   * Hash of grammar sources + ANTLR version. Used for cache validation via ETag. Format:
+   * "sha256:abc123..."
+   */
+  private String grammarHash;
+
+  // ============================================================================
+  // Lexer ATN & metadata
+  // ============================================================================
+
+  /**
+   * Serialized lexer ATN as int array. Frontend uses directly: new
+   * ATNDeserializer().deserialize(lexerSerializedATN)
+   */
+  private int[] lexerSerializedATN;
+
+  /** Lexer rule names (e.g., ["SEARCH", "WHERE", "PIPE", ...]) */
+  private String[] lexerRuleNames;
+
+  /** Channel names (e.g., ["DEFAULT_TOKEN_CHANNEL", "WHITESPACE", "ERRORCHANNEL"]) */
+  private String[] channelNames;
+
+  /** Mode names (e.g., ["DEFAULT_MODE"]) */
+  private String[] modeNames;
+
+  // ============================================================================
+  // Parser ATN & metadata
+  // ============================================================================
+
+  /**
+   * Serialized parser ATN as int array. Frontend uses directly: new
+   * ATNDeserializer().deserialize(parserSerializedATN)
+   */
+  private int[] parserSerializedATN;
+
+  /** Parser rule names (e.g., ["root", "pplStatement", "commands", ...]) */
+  private String[] parserRuleNames;
+
+  /** Start rule index (usually 0 for "root" rule) */
+  private int startRuleIndex;
+
+  // ============================================================================
+  // Vocabulary
+  // ============================================================================
+
+  /**
+   * Literal names from vocabulary. Index = token type. Values are literal tokens with quotes, or
+   * null. Example: ["<INVALID>", "'search'", "'where'", "'|'", null, null, ...]
+   */
+  private String[] literalNames;
+
+  /**
+   * Symbolic names from vocabulary. Index = token type. Values are token symbolic names, or null.
+   * Example: ["<INVALID>", "SEARCH", "WHERE", "PIPE", "ID", "INTEGER", ...]
+   */
+  private String[] symbolicNames;
+
+  /**
+   * Optional display names (user-friendly token names). If not provided, frontend uses literal or
+   * symbolic names.
+   */
+  private String[] displayNames;
+
+  // ============================================================================
+  // Token classification
+  // ============================================================================
+
+  /**
+   * Mapping from token symbolic name to suggestion category. Used by frontend to classify antlr4-c3
+   * token candidates into suggestion types.
+   *
+   * <p>Example: { "SEARCH": "COMMAND", "WHERE": "COMMAND", "BY": "KEYWORD", "COUNT": "FUNCTION",
+   * "AND": "OPERATOR" }
+   */
+  private Map<String, String> tokenTypeToCategory;
+}
diff --git a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java b/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java
new file mode 100644
index 00000000000..5bbffea1e9e
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java
@@ -0,0 +1,202 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.executor.autocomplete;
+
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import lombok.extern.log4j.Log4j2;
+import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.Parser;
+import org.antlr.v4.runtime.Vocabulary;
+
+/**
+ * Utility class for extracting ANTLR grammar artifacts (ATN, vocabulary, rule names) from generated
+ * parser/lexer classes.
+ *
+ * <p>This class handles the low-level details of:
+ *
+ * <ul>
+ *   <li>Converting ANTLR's Java String ATN format to int[] for JSON transfer
+ *   <li>Extracting vocabulary (literal and symbolic names)
+ *   <li>Extracting rule names via public ANTLR APIs
+ *   <li>Computing grammar hash for versioning
+ * </ul>
+ *
+ * <p>Language-specific builders (PPL, SQL) use this class to build their autocomplete bundles.
+ */
+@Log4j2
+public class GrammarArtifactBuilder {
+
+  /**
+   * Extract literal names from vocabulary.
+   *
+   * <p>Returns array where index = token type, value = literal token (with quotes) or null.
+   *
+   * <p>Example: ["<INVALID>", "'search'", "'where'", "'|'", null, ...]
+   *
+   * @param vocabulary Parser vocabulary
+   * @return Array of literal names
+   */
+  public static String[] extractLiteralNames(Vocabulary vocabulary) {
+    int maxTokenType = vocabulary.getMaxTokenType();
+    String[] names = new String[maxTokenType + 1];
+
+    for (int i = 0; i <= maxTokenType; i++) {
+      String literal = vocabulary.getLiteralName(i);
+      // Keep nulls as nulls (no literal representation)
+      names[i] = literal;
+    }
+
+    log.debug("Extracted {} literal names", names.length);
+    return names;
+  }
+
+  /**
+   * Extract symbolic names from vocabulary.
+   *
+   * <p>Returns array where index = token type, value = symbolic token name or null.
+   *
+   * <p>Example: ["<INVALID>", "SEARCH", "WHERE", "PIPE", "ID", ...]
+   *
+   * @param vocabulary Parser vocabulary
+   * @return Array of symbolic names
+   */
+  public static String[] extractSymbolicNames(Vocabulary vocabulary) {
+    int maxTokenType = vocabulary.getMaxTokenType();
+    String[] names = new String[maxTokenType + 1];
+
+    for (int i = 0; i <= maxTokenType; i++) {
+      String symbolic = vocabulary.getSymbolicName(i);
+      // Keep nulls as nulls (no symbolic name)
+      names[i] = symbolic;
+    }
+
+    log.debug("Extracted {} symbolic names", names.length);
+    return names;
+  }
+
+  /**
+   * Extract rule names from parser.
+   *
+   * <p>Parser.getRuleNames() is public API.
+   *
+   * @param parser Parser instance
+   * @return Array of rule names
+   */
+  public static String[] extractParserRuleNames(Parser parser) {
+    String[] ruleNames = parser.getRuleNames();
+    log.debug("Extracted {} parser rule names", ruleNames.length);
+    return ruleNames;
+  }
+
+  /**
+   * Extract rule names from lexer.
+   *
+   * <p>Lexer.getRuleNames() is public API (Lexer extends Recognizer).
+   *
+   * @param lexer Lexer instance
+   * @return Array of lexer rule names
+   */
+  public static String[] extractLexerRuleNames(Lexer lexer) {
+    String[] ruleNames = lexer.getRuleNames();
+    log.debug("Extracted {} lexer rule names", ruleNames.length);
+    return ruleNames;
+  }
+
+  /**
+   * Extract channel names from lexer.
+   *
+   * <p>ANTLR 4.x exposes channel names via getChannelNames() method in generated lexers.
+   * This method dynamically extracts the actual channel names from the lexer instance.
+   *
+   * @param lexer Lexer instance
+   * @return Array of channel names
+   */
+  public static String[] extractChannelNames(Lexer lexer) {
+    String[] channelNames = lexer.getChannelNames();
+    log.debug("Extracted {} channel names from lexer", channelNames.length);
+    return channelNames;
+  }
+
+  /**
+   * Extract mode names from lexer.
+   *
+   * <p>ANTLR 4.x exposes mode names via getModeNames() method in generated lexers.
+   * This method dynamically extracts the actual mode names from the lexer instance.
+   *
+   * @param lexer Lexer instance
+   * @return Array of mode names
+   */
+  public static String[] extractModeNames(Lexer lexer) {
+    String[] modeNames = lexer.getModeNames();
+    log.debug("Extracted {} mode names from lexer", modeNames.length);
+    return modeNames;
+  }
+
+  /**
+   * Compute grammar hash from ATN data (recommended).
+   *
+   * <p>This method hashes the serialized ATN arrays directly, which:
+   *
+   * <ul>
+   *   <li>Always available at runtime (no classpath dependencies)
+   *   <li>Reflects the actual artifact being served
+   *   <li>Changes when grammar changes (ATN structure changes)
+   * </ul>
+   *
+   * @param lexerATN Serialized lexer ATN as int array
+   * @param parserATN Serialized parser ATN as int array
+   * @param antlrVersion ANTLR tool version (e.g., "4.13.2")
+   * @return Hash string in format "sha256:abc123..."
+   */
+  public static String computeGrammarHash(int[] lexerATN, int[] parserATN, String antlrVersion) {
+    try {
+      MessageDigest digest = MessageDigest.getInstance("SHA-256");
+
+      // Hash lexer ATN data
+      for (int value : lexerATN) {
+        digest.update((byte) (value >> 8));
+        digest.update((byte) value);
+      }
+
+      // Hash parser ATN data
+      for (int value : parserATN) {
+        digest.update((byte) (value >> 8));
+        digest.update((byte) value);
+      }
+
+      // Hash ANTLR version to detect generator changes
+      digest.update(antlrVersion.getBytes(StandardCharsets.UTF_8));
+
+      // Compute hash
+      byte[] hashBytes = digest.digest();
+      String result = "sha256:" + bytesToHex(hashBytes);
+
+      log.info("Computed grammar hash from ATN data: {}", result);
+      return result;
+
+    } catch (NoSuchAlgorithmException e) {
+      // SHA-256 is required by Java specification, this should never happen
+      throw new IllegalStateException("SHA-256 algorithm not available", e);
+    }
+  }
+
+
+  /**
+   * Convert byte array to hex string.
+   *
+   * @param bytes Input bytes
+   * @return Hex string (lowercase)
+   */
+  private static String bytesToHex(byte[] bytes) {
+    StringBuilder sb = new StringBuilder(bytes.length * 2);
+    for (byte b : bytes) {
+      sb.append(String.format("%02x", b & 0xFF));
+    }
+    return sb.toString();
+  }
+}
diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java
index d817e13c69f..edffd65f6bf 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java
@@ -94,6 +94,7 @@
 import org.opensearch.sql.opensearch.storage.OpenSearchDataSourceFactory;
 import org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine;
 import org.opensearch.sql.plugin.config.OpenSearchPluginModule;
+import org.opensearch.sql.plugin.rest.RestPPLGrammarAction;
 import org.opensearch.sql.plugin.rest.RestPPLQueryAction;
 import org.opensearch.sql.plugin.rest.RestPPLStatsAction;
 import org.opensearch.sql.plugin.rest.RestQuerySettingsAction;
@@ -163,6 +164,7 @@ public List<RestHandler> getRestHandlers(
 
     return Arrays.asList(
         new RestPPLQueryAction(),
+        new RestPPLGrammarAction(),
         new RestSqlAction(settings, injector),
         new RestSqlStatsAction(settings, restController),
         new RestPPLStatsAction(settings, restController),
diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
new file mode 100644
index 00000000000..c8908cd7228
--- /dev/null
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
@@ -0,0 +1,228 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.plugin.rest;
+
+import static org.opensearch.rest.RestRequest.Method.GET;
+
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.util.List;
+import lombok.extern.log4j.Log4j2;
+import org.opensearch.core.rest.RestStatus;
+import org.opensearch.core.xcontent.XContentBuilder;
+import org.opensearch.rest.BaseRestHandler;
+import org.opensearch.rest.BytesRestResponse;
+import org.opensearch.rest.RestRequest;
+import org.opensearch.sql.executor.autocomplete.AutocompleteArtifact;
+import org.opensearch.sql.ppl.autocomplete.PPLAutocompleteArtifactBuilder;
+import org.opensearch.transport.client.node.NodeClient;
+
+/**
+ * REST handler for PPL grammar metadata endpoint.
+ *
+ * <p>Endpoint: GET /_plugins/_ppl/_grammar
+ *
+ * <p>Returns grammar data for client-side parsing, autocomplete, syntax highlighting, etc:
+ *
+ * <ul>
+ *   <li>Serialized ANTLR ATN data (lexer + parser)
+ *   <li>Vocabulary (literal and symbolic names)
+ *   <li>Rule names (parser and lexer)
+ *   <li>Channel and mode names
+ * </ul>
+ *
+ * <p>The bundle is cached in memory and versioned via ETag (grammar hash). Clients should cache
+ * locally and only refetch when ETag changes.
+ *
+ * <p>Example request:
+ *
+ * <pre>
+ * GET /_plugins/_ppl/_grammar
+ * If-None-Match: "sha256:abc123..."
+ * </pre>
+ *
+ * <p>Response headers:
+ *
+ * <pre>
+ * HTTP/1.1 200 OK
+ * ETag: "sha256:abc123..."
+ * Cache-Control: public, max-age=3600
+ * Content-Type: application/json
+ * </pre>
+ *
+ * <p>Or if client has latest:
+ *
+ * <pre>
+ * HTTP/1.1 304 Not Modified
+ * ETag: "sha256:abc123..."
+ * </pre>
+ */
+@Log4j2
+public class RestPPLGrammarAction extends BaseRestHandler {
+
+  private static final String ENDPOINT_PATH = "/_plugins/_ppl/_grammar";
+
+  // Lazy-initialized singleton artifact (built once per JVM lifecycle)
+  private volatile AutocompleteArtifact cachedArtifact;
+  private final Object artifactLock = new Object();
+
+  @Override
+  public String getName() {
+    return "ppl_grammar_action";
+  }
+
+  @Override
+  public List<Route> routes() {
+    return ImmutableList.of(new Route(GET, ENDPOINT_PATH));
+  }
+
+  @Override
+  protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client)
+      throws IOException {
+
+    log.debug("Received PPL grammar request");
+
+    return channel -> {
+      try {
+        // Get or build artifact (lazy initialization)
+        AutocompleteArtifact artifact = getOrBuildArtifact();
+
+        String grammarHash = artifact.getGrammarHash();
+        String ifNoneMatch = request.header("If-None-Match");
+
+        // Strip quotes from ETag if present ("sha256:..." → sha256:...)
+        if (ifNoneMatch != null && ifNoneMatch.startsWith("\"") && ifNoneMatch.endsWith("\"")) {
+          ifNoneMatch = ifNoneMatch.substring(1, ifNoneMatch.length() - 1);
+        }
+
+        // Return 304 Not Modified if client has latest version
+        if (grammarHash.equals(ifNoneMatch)) {
+          log.debug("Client has latest grammar (ETag match), returning 304");
+          BytesRestResponse response =
+              new BytesRestResponse(RestStatus.NOT_MODIFIED, "application/json", "");
+          response.addHeader("ETag", "\"" + grammarHash + "\"");
+          response.addHeader("Cache-Control", "public, max-age=3600");
+          channel.sendResponse(response);
+          return;
+        }
+
+        // Serialize artifact to JSON
+        log.debug("Serializing grammar to JSON (hash: {})", grammarHash);
+        XContentBuilder builder = channel.newBuilder();
+        serializeArtifact(builder, artifact);
+
+        BytesRestResponse response = new BytesRestResponse(RestStatus.OK, builder);
+
+        // Add caching headers
+        response.addHeader("ETag", "\"" + grammarHash + "\"");
+        response.addHeader("Cache-Control", "public, max-age=3600");
+        response.addHeader("Content-Type", "application/json; charset=UTF-8");
+
+        log.info("Returning PPL grammar (size: {} bytes)", response.content().length());
+        channel.sendResponse(response);
+
+      } catch (Exception e) {
+        log.error("Error building or serializing PPL grammar", e);
+        channel.sendResponse(new BytesRestResponse(channel, RestStatus.INTERNAL_SERVER_ERROR, e));
+      }
+    };
+  }
+
+  /**
+   * Get cached artifact or build it if not initialized.
+   *
+   * <p>Thread-safe lazy initialization with double-checked locking.
+   */
+  private AutocompleteArtifact getOrBuildArtifact() {
+    // First check without lock (common case: already initialized)
+    if (cachedArtifact != null) {
+      return cachedArtifact;
+    }
+
+    // Acquire lock for initialization
+    synchronized (artifactLock) {
+      // Double-check after acquiring lock
+      if (cachedArtifact == null) {
+        log.info("Building PPL grammar artifact (first request)...");
+        long startTime = System.currentTimeMillis();
+
+        PPLAutocompleteArtifactBuilder builder = new PPLAutocompleteArtifactBuilder();
+        cachedArtifact = builder.buildArtifact();
+
+        long elapsed = System.currentTimeMillis() - startTime;
+        log.info("Built PPL grammar in {}ms (hash: {})", elapsed, cachedArtifact.getGrammarHash());
+      }
+      return cachedArtifact;
+    }
+  }
+
+  /**
+   * Invalidate cached artifact (for testing or grammar updates).
+   *
+   * <p>Note: This only affects the current node. In a multi-node cluster, each node maintains its
+   * own cache.
+   */
+  public void invalidateCache() {
+    synchronized (artifactLock) {
+      log.info("Invalidating cached PPL grammar");
+      cachedArtifact = null;
+    }
+  }
+
+  /**
+   * Manually serialize AutocompleteArtifact to JSON.
+   *
+   * <p>Outputs ATN data and arrays as standard JSON arrays.
+   */
+  private void serializeArtifact(XContentBuilder builder, AutocompleteArtifact artifact)
+      throws IOException {
+    builder.startObject();
+
+    // Identity & versioning
+    builder.field("bundleVersion", artifact.getBundleVersion());
+    builder.field("grammarHash", artifact.getGrammarHash());
+    builder.field("startRuleIndex", artifact.getStartRuleIndex());
+
+    // Lexer ATN & metadata
+    if (artifact.getLexerSerializedATN() != null) {
+      builder.field("lexerSerializedATN", artifact.getLexerSerializedATN());
+      log.debug("Lexer ATN: {} elements", artifact.getLexerSerializedATN().length);
+    }
+
+    if (artifact.getLexerRuleNames() != null) {
+      builder.field("lexerRuleNames", artifact.getLexerRuleNames());
+    }
+
+    if (artifact.getChannelNames() != null) {
+      builder.field("channelNames", artifact.getChannelNames());
+    }
+
+    if (artifact.getModeNames() != null) {
+      builder.field("modeNames", artifact.getModeNames());
+    }
+
+    // Parser ATN & metadata
+    if (artifact.getParserSerializedATN() != null) {
+      builder.field("parserSerializedATN", artifact.getParserSerializedATN());
+      log.debug("Parser ATN: {} elements", artifact.getParserSerializedATN().length);
+    }
+
+    if (artifact.getParserRuleNames() != null) {
+      builder.field("parserRuleNames", artifact.getParserRuleNames());
+    }
+
+    // Vocabulary
+    if (artifact.getLiteralNames() != null) {
+      builder.field("literalNames", artifact.getLiteralNames());
+    }
+
+    if (artifact.getSymbolicNames() != null) {
+      builder.field("symbolicNames", artifact.getSymbolicNames());
+    }
+
+    builder.endObject();
+  }
+}
diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
new file mode 100644
index 00000000000..cfcd8c9892e
--- /dev/null
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -0,0 +1,260 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.plugin.rest;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+
+import java.io.ByteArrayOutputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import org.junit.Before;
+import org.junit.Test;
+import org.opensearch.core.rest.RestStatus;
+import org.opensearch.core.xcontent.NamedXContentRegistry;
+import org.opensearch.rest.RestChannel;
+import org.opensearch.rest.RestRequest;
+import org.opensearch.rest.RestResponse;
+import org.opensearch.test.OpenSearchTestCase;
+import org.opensearch.test.rest.FakeRestRequest;
+import org.opensearch.transport.client.node.NodeClient;
+
+/**
+ * Unit tests for RestPPLGrammarAction.
+ *
+ * <p>Tests:
+ *
+ * <ul>
+ *   <li>200 OK response with grammar bundle
+ *   <li>304 Not Modified when ETag matches
+ *   <li>Grammar structure validation
+ *   <li>Cache behavior
+ * </ul>
+ */
+public class RestPPLGrammarActionTest extends OpenSearchTestCase {
+
+  private RestPPLGrammarAction action;
+  private NodeClient client;
+
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    action = new RestPPLGrammarAction();
+    client = mock(NodeClient.class);
+  }
+
+  @Test
+  public void testName() {
+    assertEquals("ppl_grammar_action", action.getName());
+  }
+
+  @Test
+  public void testRoutes() {
+    assertEquals(1, action.routes().size());
+    assertEquals(RestRequest.Method.GET, action.routes().get(0).getMethod());
+    assertEquals("/_plugins/_ppl/_grammar", action.routes().get(0).getPath());
+  }
+
+  @Test
+  public void testGetGrammar_ReturnsBundle() throws Exception {
+    // Create request without ETag
+    FakeRestRequest request =
+        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
+            .withMethod(RestRequest.Method.GET)
+            .withPath("/_plugins/_ppl/_grammar")
+            .withHeaders(Collections.emptyMap())
+            .build();
+
+    // Create mock channel to capture response
+    MockRestChannel channel = new MockRestChannel(request, true);
+
+    // Execute request
+    action.prepareRequest(request, client).accept(channel);
+
+    // Verify response
+    RestResponse response = channel.getResponse();
+    assertNotNull("Response should not be null", response);
+    assertEquals("Should return 200 OK", RestStatus.OK, response.status());
+
+    // Verify ETag header present
+    String etag = response.getHeaders().get("ETag").get(0);
+    assertNotNull("ETag header should be present", etag);
+    assertTrue("ETag should start with sha256:", etag.contains("sha256:"));
+
+    // Verify Cache-Control header
+    String cacheControl = response.getHeaders().get("Cache-Control").get(0);
+    assertNotNull("Cache-Control header should be present", cacheControl);
+    assertTrue("Should allow caching", cacheControl.contains("max-age=3600"));
+
+    // Verify response content
+    String content = new String(response.content().array(), StandardCharsets.UTF_8);
+    assertTrue("Should contain bundleVersion", content.contains("\"bundleVersion\":"));
+    assertTrue("Should contain grammarHash", content.contains("\"grammarHash\":"));
+    assertTrue("Should contain lexerSerializedATN", content.contains("\"lexerSerializedATN\":"));
+    assertTrue("Should contain parserSerializedATN", content.contains("\"parserSerializedATN\":"));
+    assertTrue("Should contain literalNames", content.contains("\"literalNames\":"));
+    assertTrue("Should contain symbolicNames", content.contains("\"symbolicNames\":"));
+  }
+
+  @Test
+  public void testGetGrammar_Returns304WhenETagMatches() throws Exception {
+    // First request to get ETag
+    FakeRestRequest firstRequest =
+        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
+            .withMethod(RestRequest.Method.GET)
+            .withPath("/_plugins/_ppl/_grammar")
+            .build();
+
+    MockRestChannel firstChannel = new MockRestChannel(firstRequest, true);
+    action.prepareRequest(firstRequest, client).accept(firstChannel);
+
+    RestResponse firstResponse = firstChannel.getResponse();
+    String etag = firstResponse.getHeaders().get("ETag").get(0);
+
+    // Second request with matching ETag
+    FakeRestRequest secondRequest =
+        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
+            .withMethod(RestRequest.Method.GET)
+            .withPath("/_plugins/_ppl/_grammar")
+            .withHeaders(Collections.singletonMap("If-None-Match", Collections.singletonList(etag)))
+            .build();
+
+    MockRestChannel secondChannel = new MockRestChannel(secondRequest, true);
+    action.prepareRequest(secondRequest, client).accept(secondChannel);
+
+    RestResponse secondResponse = secondChannel.getResponse();
+    assertEquals(
+        "Should return 304 Not Modified when ETag matches",
+        RestStatus.NOT_MODIFIED,
+        secondResponse.status());
+  }
+
+  @Test
+  public void testGetGrammar_ArtifactIsCached() throws Exception {
+    // Make two requests
+    FakeRestRequest request1 =
+        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
+            .withMethod(RestRequest.Method.GET)
+            .withPath("/_plugins/_ppl/_grammar")
+            .build();
+
+    MockRestChannel channel1 = new MockRestChannel(request1, true);
+    long startTime1 = System.currentTimeMillis();
+    action.prepareRequest(request1, client).accept(channel1);
+    long elapsed1 = System.currentTimeMillis() - startTime1;
+
+    // Second request should be faster (cached)
+    FakeRestRequest request2 =
+        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
+            .withMethod(RestRequest.Method.GET)
+            .withPath("/_plugins/_ppl/_grammar")
+            .build();
+
+    MockRestChannel channel2 = new MockRestChannel(request2, true);
+    long startTime2 = System.currentTimeMillis();
+    action.prepareRequest(request2, client).accept(channel2);
+    long elapsed2 = System.currentTimeMillis() - startTime2;
+
+    // Both should return same ETag
+    String etag1 = channel1.getResponse().getHeaders().get("ETag").get(0);
+    String etag2 = channel2.getResponse().getHeaders().get("ETag").get(0);
+    assertEquals("ETags should match", etag1, etag2);
+
+    // Second request should be faster (artifact cached)
+    assertTrue(
+        "Second request should be faster due to caching (elapsed1="
+            + elapsed1
+            + "ms, elapsed2="
+            + elapsed2
+            + "ms)",
+        elapsed2 < elapsed1 || elapsed2 < 50); // Allow some variance
+  }
+
+  @Test
+  public void testInvalidateCache() throws Exception {
+    // Get grammar
+    FakeRestRequest request1 =
+        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
+            .withMethod(RestRequest.Method.GET)
+            .withPath("/_plugins/_ppl/_grammar")
+            .build();
+
+    MockRestChannel channel1 = new MockRestChannel(request1, true);
+    action.prepareRequest(request1, client).accept(channel1);
+    String etag1 = channel1.getResponse().getHeaders().get("ETag").get(0);
+
+    // Invalidate cache
+    action.invalidateCache();
+
+    // Get grammar again
+    FakeRestRequest request2 =
+        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
+            .withMethod(RestRequest.Method.GET)
+            .withPath("/_plugins/_ppl/_grammar")
+            .build();
+
+    MockRestChannel channel2 = new MockRestChannel(request2, true);
+    action.prepareRequest(request2, client).accept(channel2);
+    String etag2 = channel2.getResponse().getHeaders().get("ETag").get(0);
+
+    // ETags should still match (same grammar)
+    assertEquals("ETags should match after cache invalidation", etag1, etag2);
+  }
+
+  /** Mock RestChannel to capture responses */
+  private static class MockRestChannel implements RestChannel {
+    private final RestRequest request;
+    private final boolean detailedErrorsEnabled;
+    private RestResponse response;
+
+    MockRestChannel(RestRequest request, boolean detailedErrorsEnabled) {
+      this.request = request;
+      this.detailedErrorsEnabled = detailedErrorsEnabled;
+    }
+
+    @Override
+    public void sendResponse(RestResponse response) {
+      this.response = response;
+    }
+
+    public RestResponse getResponse() {
+      return response;
+    }
+
+    @Override
+    public RestRequest request() {
+      return request;
+    }
+
+    @Override
+    public boolean detailedErrorsEnabled() {
+      return detailedErrorsEnabled;
+    }
+
+    @Override
+    public org.opensearch.core.xcontent.XContentBuilder newBuilder() {
+      return null;
+    }
+
+    @Override
+    public org.opensearch.core.xcontent.XContentBuilder newErrorBuilder() {
+      return null;
+    }
+
+    @Override
+    public org.opensearch.core.xcontent.XContentBuilder newBuilder(
+        org.opensearch.core.xcontent.MediaType mediaType, boolean useFiltering) {
+      return null;
+    }
+
+    @Override
+    public ByteArrayOutputStream bytesOutput() {
+      return null;
+    }
+  }
+}
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java
new file mode 100644
index 00000000000..d6e11b40848
--- /dev/null
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.ppl.autocomplete;
+
+import lombok.extern.log4j.Log4j2;
+import org.antlr.v4.runtime.CharStreams;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.Vocabulary;
+import org.antlr.v4.runtime.atn.ATNSerializer;
+import org.opensearch.sql.executor.autocomplete.AutocompleteArtifact;
+import org.opensearch.sql.executor.autocomplete.GrammarArtifactBuilder;
+import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLLexer;
+import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser;
+
+/**
+ * Builds autocomplete artifact bundle for PPL language.
+ *
+ * <p>This class:
+ *
+ * <ul>
+ *   <li>Extracts ATN and vocabulary from generated PPL parser/lexer
+ *   <li>Builds PPL-specific catalogs (commands, functions, keywords)
+ *   <li>Creates token-to-category mapping for suggestion classification
+ *   <li>Computes grammar hash for versioning
+ * </ul>
+ *
+ * <p>The resulting bundle is served via REST API and cached on frontend for client-side
+ * autocomplete.
+ */
+@Log4j2
+public class PPLAutocompleteArtifactBuilder {
+
+  private static final String ANTLR_VERSION = "4.13.2";
+  private static final String BUNDLE_VERSION = "1.0";
+  private static final String RUNTIME_TARGET = "antlr4ng@3.x";
+
+  /**
+   * Build complete autocomplete artifact for PPL using optimized encoding.
+   *
+   * <p>This version uses:
+   *
+   * <ul>
+   *   <li>Base64-encoded ATN (5-15x smaller than JSON int arrays)
+   *   <li>Newline-packed string arrays (smaller than JSON arrays)
+   *   <li>NO tokenTypeToCategory map (derive from naming conventions)
+   * </ul>
+   *
+   * @return AutocompleteArtifact ready for JSON serialization
+   */
+  public AutocompleteArtifact buildArtifact() {
+    log.info("Building optimized PPL autocomplete artifact...");
+
+    // Create parser and lexer instances (with dummy input)
+    OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString(""));
+    CommonTokenStream tokens = new CommonTokenStream(lexer);
+    OpenSearchPPLParser parser = new OpenSearchPPLParser(tokens);
+
+    // Do NOT use lexer.getSerializedATN().chars().toArray() — that gives
+    // the raw UTF-16 char encoding which has offset values and causes
+    // "state type 65535 is not valid" errors in the frontend deserializer.
+    int[] lexerATNArray = new ATNSerializer(lexer.getATN()).serialize().toArray();
+    int[] parserATNArray = new ATNSerializer(parser.getATN()).serialize().toArray();
+
+    log.info("ATN data ready:");
+    log.info("  Lexer ATN: {} elements", lexerATNArray.length);
+    log.info("  Parser ATN: {} elements", parserATNArray.length);
+
+    // Extract vocabulary
+    Vocabulary vocabulary = parser.getVocabulary();
+    String[] literalNames = GrammarArtifactBuilder.extractLiteralNames(vocabulary);
+    String[] symbolicNames = GrammarArtifactBuilder.extractSymbolicNames(vocabulary);
+
+    // Extract rule names
+    String[] parserRuleNames = GrammarArtifactBuilder.extractParserRuleNames(parser);
+    String[] lexerRuleNames = GrammarArtifactBuilder.extractLexerRuleNames(lexer);
+
+    // Extract channel/mode names from lexer
+    String[] channelNames = GrammarArtifactBuilder.extractChannelNames(lexer);
+    String[] modeNames = GrammarArtifactBuilder.extractModeNames(lexer);
+
+    // Compute grammar hash from ATN data (always available, no classpath dependency)
+    String grammarHash =
+        GrammarArtifactBuilder.computeGrammarHash(lexerATNArray, parserATNArray, ANTLR_VERSION);
+
+    // Build minimal artifact with only essential fields
+    AutocompleteArtifact artifact =
+        AutocompleteArtifact.builder()
+            .bundleVersion(BUNDLE_VERSION)
+            .grammarHash(grammarHash)
+            // ATN data (required for interpreters)
+            .lexerSerializedATN(lexerATNArray)
+            .parserSerializedATN(parserATNArray)
+            // Rule/channel/mode names (required for interpreters)
+            .lexerRuleNames(lexerRuleNames)
+            .parserRuleNames(parserRuleNames)
+            .channelNames(channelNames)
+            .modeNames(modeNames)
+            .startRuleIndex(0) // "root" rule
+            // Vocabulary (required for token display)
+            .literalNames(literalNames)
+            .symbolicNames(symbolicNames)
+            // NO tokenTypeToCategory - removed for size (derive from conventions)
+            .tokenTypeToCategory(null)
+            .build();
+
+    log.info(
+        "Built optimized PPL artifact: {} tokens, lexer ATN: {} elements, parser ATN: {} elements",
+        symbolicNames.length,
+        lexerATNArray.length,
+        parserATNArray.length);
+    log.info(
+        "ATN transfer size: ~{}KB (JSON int[]), ~{}KB (gzipped)",
+        (lexerATNArray.length + parserATNArray.length) * 4 / 1024,
+        (lexerATNArray.length + parserATNArray.length) * 2 / 1024);
+
+    return artifact;
+  }
+
+}

From ecb07e44849c8637abbfd946fa8b33a330625508 Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Thu, 19 Feb 2026 10:38:33 -0800
Subject: [PATCH 02/20] remove unused etag

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../sql/plugin/rest/RestPPLGrammarAction.java | 52 ---------------
 .../plugin/rest/RestPPLGrammarActionTest.java | 66 +------------------
 2 files changed, 3 insertions(+), 115 deletions(-)

diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
index c8908cd7228..c34e37ead00 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
@@ -34,31 +34,6 @@
  *   <li>Channel and mode names
  * </ul>
  *
- * <p>The bundle is cached in memory and versioned via ETag (grammar hash). Clients should cache
- * locally and only refetch when ETag changes.
- *
- * <p>Example request:
- *
- * <pre>
- * GET /_plugins/_ppl/_grammar
- * If-None-Match: "sha256:abc123..."
- * </pre>
- *
- * <p>Response headers:
- *
- * <pre>
- * HTTP/1.1 200 OK
- * ETag: "sha256:abc123..."
- * Cache-Control: public, max-age=3600
- * Content-Type: application/json
- * </pre>
- *
- * <p>Or if client has latest:
- *
- * <pre>
- * HTTP/1.1 304 Not Modified
- * ETag: "sha256:abc123..."
- * </pre>
  */
 @Log4j2
 public class RestPPLGrammarAction extends BaseRestHandler {
@@ -90,37 +65,10 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
         // Get or build artifact (lazy initialization)
         AutocompleteArtifact artifact = getOrBuildArtifact();
 
-        String grammarHash = artifact.getGrammarHash();
-        String ifNoneMatch = request.header("If-None-Match");
-
-        // Strip quotes from ETag if present ("sha256:..." → sha256:...)
-        if (ifNoneMatch != null && ifNoneMatch.startsWith("\"") && ifNoneMatch.endsWith("\"")) {
-          ifNoneMatch = ifNoneMatch.substring(1, ifNoneMatch.length() - 1);
-        }
-
-        // Return 304 Not Modified if client has latest version
-        if (grammarHash.equals(ifNoneMatch)) {
-          log.debug("Client has latest grammar (ETag match), returning 304");
-          BytesRestResponse response =
-              new BytesRestResponse(RestStatus.NOT_MODIFIED, "application/json", "");
-          response.addHeader("ETag", "\"" + grammarHash + "\"");
-          response.addHeader("Cache-Control", "public, max-age=3600");
-          channel.sendResponse(response);
-          return;
-        }
-
-        // Serialize artifact to JSON
-        log.debug("Serializing grammar to JSON (hash: {})", grammarHash);
         XContentBuilder builder = channel.newBuilder();
         serializeArtifact(builder, artifact);
 
         BytesRestResponse response = new BytesRestResponse(RestStatus.OK, builder);
-
-        // Add caching headers
-        response.addHeader("ETag", "\"" + grammarHash + "\"");
-        response.addHeader("Cache-Control", "public, max-age=3600");
-        response.addHeader("Content-Type", "application/json; charset=UTF-8");
-
         log.info("Returning PPL grammar (size: {} bytes)", response.content().length());
         channel.sendResponse(response);
 
diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
index cfcd8c9892e..b0f023b1923 100644
--- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -12,7 +12,6 @@
 
 import java.io.ByteArrayOutputStream;
 import java.nio.charset.StandardCharsets;
-import java.util.Collections;
 import org.junit.Before;
 import org.junit.Test;
 import org.opensearch.core.rest.RestStatus;
@@ -31,7 +30,6 @@
  *
  * <ul>
  *   <li>200 OK response with grammar bundle
- *   <li>304 Not Modified when ETag matches
  *   <li>Grammar structure validation
  *   <li>Cache behavior
  * </ul>
@@ -62,36 +60,19 @@ public void testRoutes() {
 
   @Test
   public void testGetGrammar_ReturnsBundle() throws Exception {
-    // Create request without ETag
     FakeRestRequest request =
         new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
             .withMethod(RestRequest.Method.GET)
             .withPath("/_plugins/_ppl/_grammar")
-            .withHeaders(Collections.emptyMap())
             .build();
 
-    // Create mock channel to capture response
     MockRestChannel channel = new MockRestChannel(request, true);
-
-    // Execute request
     action.prepareRequest(request, client).accept(channel);
 
-    // Verify response
     RestResponse response = channel.getResponse();
     assertNotNull("Response should not be null", response);
     assertEquals("Should return 200 OK", RestStatus.OK, response.status());
 
-    // Verify ETag header present
-    String etag = response.getHeaders().get("ETag").get(0);
-    assertNotNull("ETag header should be present", etag);
-    assertTrue("ETag should start with sha256:", etag.contains("sha256:"));
-
-    // Verify Cache-Control header
-    String cacheControl = response.getHeaders().get("Cache-Control").get(0);
-    assertNotNull("Cache-Control header should be present", cacheControl);
-    assertTrue("Should allow caching", cacheControl.contains("max-age=3600"));
-
-    // Verify response content
     String content = new String(response.content().array(), StandardCharsets.UTF_8);
     assertTrue("Should contain bundleVersion", content.contains("\"bundleVersion\":"));
     assertTrue("Should contain grammarHash", content.contains("\"grammarHash\":"));
@@ -101,39 +82,6 @@ public void testGetGrammar_ReturnsBundle() throws Exception {
     assertTrue("Should contain symbolicNames", content.contains("\"symbolicNames\":"));
   }
 
-  @Test
-  public void testGetGrammar_Returns304WhenETagMatches() throws Exception {
-    // First request to get ETag
-    FakeRestRequest firstRequest =
-        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
-            .withMethod(RestRequest.Method.GET)
-            .withPath("/_plugins/_ppl/_grammar")
-            .build();
-
-    MockRestChannel firstChannel = new MockRestChannel(firstRequest, true);
-    action.prepareRequest(firstRequest, client).accept(firstChannel);
-
-    RestResponse firstResponse = firstChannel.getResponse();
-    String etag = firstResponse.getHeaders().get("ETag").get(0);
-
-    // Second request with matching ETag
-    FakeRestRequest secondRequest =
-        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
-            .withMethod(RestRequest.Method.GET)
-            .withPath("/_plugins/_ppl/_grammar")
-            .withHeaders(Collections.singletonMap("If-None-Match", Collections.singletonList(etag)))
-            .build();
-
-    MockRestChannel secondChannel = new MockRestChannel(secondRequest, true);
-    action.prepareRequest(secondRequest, client).accept(secondChannel);
-
-    RestResponse secondResponse = secondChannel.getResponse();
-    assertEquals(
-        "Should return 304 Not Modified when ETag matches",
-        RestStatus.NOT_MODIFIED,
-        secondResponse.status());
-  }
-
   @Test
   public void testGetGrammar_ArtifactIsCached() throws Exception {
     // Make two requests
@@ -160,11 +108,6 @@ public void testGetGrammar_ArtifactIsCached() throws Exception {
     action.prepareRequest(request2, client).accept(channel2);
     long elapsed2 = System.currentTimeMillis() - startTime2;
 
-    // Both should return same ETag
-    String etag1 = channel1.getResponse().getHeaders().get("ETag").get(0);
-    String etag2 = channel2.getResponse().getHeaders().get("ETag").get(0);
-    assertEquals("ETags should match", etag1, etag2);
-
     // Second request should be faster (artifact cached)
     assertTrue(
         "Second request should be faster due to caching (elapsed1="
@@ -186,12 +129,12 @@ public void testInvalidateCache() throws Exception {
 
     MockRestChannel channel1 = new MockRestChannel(request1, true);
     action.prepareRequest(request1, client).accept(channel1);
-    String etag1 = channel1.getResponse().getHeaders().get("ETag").get(0);
+    assertEquals(RestStatus.OK, channel1.getResponse().status());
 
     // Invalidate cache
     action.invalidateCache();
 
-    // Get grammar again
+    // Get grammar again — should still return 200 OK with same content
     FakeRestRequest request2 =
         new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
             .withMethod(RestRequest.Method.GET)
@@ -200,10 +143,7 @@ public void testInvalidateCache() throws Exception {
 
     MockRestChannel channel2 = new MockRestChannel(request2, true);
     action.prepareRequest(request2, client).accept(channel2);
-    String etag2 = channel2.getResponse().getHeaders().get("ETag").get(0);
-
-    // ETags should still match (same grammar)
-    assertEquals("ETags should match after cache invalidation", etag1, etag2);
+    assertEquals(RestStatus.OK, channel2.getResponse().status());
   }
 
   /** Mock RestChannel to capture responses */

From 380945bdce2a275dd9c12177d8f1bb44285057f2 Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Thu, 19 Feb 2026 10:56:20 -0800
Subject: [PATCH 03/20] cleanup on unused code

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../autocomplete/AutocompleteArtifact.java    | 121 -----------
 .../autocomplete/GrammarArtifactBuilder.java  | 202 ------------------
 .../executor/autocomplete/GrammarBundle.java  |  53 +++++
 .../sql/plugin/rest/RestPPLGrammarAction.java |  93 ++++----
 .../plugin/rest/RestPPLGrammarActionTest.java |   2 +-
 .../PPLAutocompleteArtifactBuilder.java       | 122 -----------
 .../autocomplete/PPLGrammarBundleBuilder.java |  98 +++++++++
 7 files changed, 197 insertions(+), 494 deletions(-)
 delete mode 100644 core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java
 delete mode 100644 core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java
 create mode 100644 core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java
 delete mode 100644 ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java
 create mode 100644 ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java

diff --git a/core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java b/core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java
deleted file mode 100644
index 38b87765cbd..00000000000
--- a/core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
- */
-
-package org.opensearch.sql.executor.autocomplete;
-
-import java.util.Map;
-import lombok.Builder;
-import lombok.Data;
-
-/**
- * Autocomplete artifact bundle containing everything needed for client-side grammar-based
- * autocomplete.
- *
- * <p>This bundle is language-agnostic and can be used for PPL, SQL, or any ANTLR-based language. It
- * contains:
- *
- * <ul>
- *   <li>Serialized ATN data for lexer and parser (for antlr4ng runtime)
- *   <li>Vocabulary and rule names (for token/rule interpretation)
- *   <li>Static catalogs (commands, functions, keywords, snippets)
- *   <li>Token classification mapping (for suggestion categorization)
- * </ul>
- *
- * <p>Frontend uses this bundle to:
- *
- * <ol>
- *   <li>Deserialize ATNs with antlr4ng
- *   <li>Create LexerInterpreter and ParserInterpreter
- *   <li>Use antlr4-c3 to find valid tokens at cursor
- *   <li>Generate suggestions from catalogs
- * </ol>
- */
-@Data
-@Builder
-public class AutocompleteArtifact {
-
-  // ============================================================================
-  // Identity & versioning
-  // ============================================================================
-
-  /** Bundle version (increment when format changes) */
-  private String bundleVersion;
-
-  /**
-   * Hash of grammar sources + ANTLR version. Used for cache validation via ETag. Format:
-   * "sha256:abc123..."
-   */
-  private String grammarHash;
-
-  // ============================================================================
-  // Lexer ATN & metadata
-  // ============================================================================
-
-  /**
-   * Serialized lexer ATN as int array. Frontend uses directly: new
-   * ATNDeserializer().deserialize(lexerSerializedATN)
-   */
-  private int[] lexerSerializedATN;
-
-  /** Lexer rule names (e.g., ["SEARCH", "WHERE", "PIPE", ...]) */
-  private String[] lexerRuleNames;
-
-  /** Channel names (e.g., ["DEFAULT_TOKEN_CHANNEL", "WHITESPACE", "ERRORCHANNEL"]) */
-  private String[] channelNames;
-
-  /** Mode names (e.g., ["DEFAULT_MODE"]) */
-  private String[] modeNames;
-
-  // ============================================================================
-  // Parser ATN & metadata
-  // ============================================================================
-
-  /**
-   * Serialized parser ATN as int array. Frontend uses directly: new
-   * ATNDeserializer().deserialize(parserSerializedATN)
-   */
-  private int[] parserSerializedATN;
-
-  /** Parser rule names (e.g., ["root", "pplStatement", "commands", ...]) */
-  private String[] parserRuleNames;
-
-  /** Start rule index (usually 0 for "root" rule) */
-  private int startRuleIndex;
-
-  // ============================================================================
-  // Vocabulary
-  // ============================================================================
-
-  /**
-   * Literal names from vocabulary. Index = token type. Values are literal tokens with quotes, or
-   * null. Example: ["<INVALID>", "'search'", "'where'", "'|'", null, null, ...]
-   */
-  private String[] literalNames;
-
-  /**
-   * Symbolic names from vocabulary. Index = token type. Values are token symbolic names, or null.
-   * Example: ["<INVALID>", "SEARCH", "WHERE", "PIPE", "ID", "INTEGER", ...]
-   */
-  private String[] symbolicNames;
-
-  /**
-   * Optional display names (user-friendly token names). If not provided, frontend uses literal or
-   * symbolic names.
-   */
-  private String[] displayNames;
-
-  // ============================================================================
-  // Token classification
-  // ============================================================================
-
-  /**
-   * Mapping from token symbolic name to suggestion category. Used by frontend to classify antlr4-c3
-   * token candidates into suggestion types.
-   *
-   * <p>Example: { "SEARCH": "COMMAND", "WHERE": "COMMAND", "BY": "KEYWORD", "COUNT": "FUNCTION",
-   * "AND": "OPERATOR" }
-   */
-  private Map<String, String> tokenTypeToCategory;
-}
diff --git a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java b/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java
deleted file mode 100644
index 5bbffea1e9e..00000000000
--- a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
- */
-
-package org.opensearch.sql.executor.autocomplete;
-
-import java.nio.charset.StandardCharsets;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-import lombok.extern.log4j.Log4j2;
-import org.antlr.v4.runtime.Lexer;
-import org.antlr.v4.runtime.Parser;
-import org.antlr.v4.runtime.Vocabulary;
-
-/**
- * Utility class for extracting ANTLR grammar artifacts (ATN, vocabulary, rule names) from generated
- * parser/lexer classes.
- *
- * <p>This class handles the low-level details of:
- *
- * <ul>
- *   <li>Converting ANTLR's Java String ATN format to int[] for JSON transfer
- *   <li>Extracting vocabulary (literal and symbolic names)
- *   <li>Extracting rule names via public ANTLR APIs
- *   <li>Computing grammar hash for versioning
- * </ul>
- *
- * <p>Language-specific builders (PPL, SQL) use this class to build their autocomplete bundles.
- */
-@Log4j2
-public class GrammarArtifactBuilder {
-
-  /**
-   * Extract literal names from vocabulary.
-   *
-   * <p>Returns array where index = token type, value = literal token (with quotes) or null.
-   *
-   * <p>Example: ["<INVALID>", "'search'", "'where'", "'|'", null, ...]
-   *
-   * @param vocabulary Parser vocabulary
-   * @return Array of literal names
-   */
-  public static String[] extractLiteralNames(Vocabulary vocabulary) {
-    int maxTokenType = vocabulary.getMaxTokenType();
-    String[] names = new String[maxTokenType + 1];
-
-    for (int i = 0; i <= maxTokenType; i++) {
-      String literal = vocabulary.getLiteralName(i);
-      // Keep nulls as nulls (no literal representation)
-      names[i] = literal;
-    }
-
-    log.debug("Extracted {} literal names", names.length);
-    return names;
-  }
-
-  /**
-   * Extract symbolic names from vocabulary.
-   *
-   * <p>Returns array where index = token type, value = symbolic token name or null.
-   *
-   * <p>Example: ["<INVALID>", "SEARCH", "WHERE", "PIPE", "ID", ...]
-   *
-   * @param vocabulary Parser vocabulary
-   * @return Array of symbolic names
-   */
-  public static String[] extractSymbolicNames(Vocabulary vocabulary) {
-    int maxTokenType = vocabulary.getMaxTokenType();
-    String[] names = new String[maxTokenType + 1];
-
-    for (int i = 0; i <= maxTokenType; i++) {
-      String symbolic = vocabulary.getSymbolicName(i);
-      // Keep nulls as nulls (no symbolic name)
-      names[i] = symbolic;
-    }
-
-    log.debug("Extracted {} symbolic names", names.length);
-    return names;
-  }
-
-  /**
-   * Extract rule names from parser.
-   *
-   * <p>Parser.getRuleNames() is public API.
-   *
-   * @param parser Parser instance
-   * @return Array of rule names
-   */
-  public static String[] extractParserRuleNames(Parser parser) {
-    String[] ruleNames = parser.getRuleNames();
-    log.debug("Extracted {} parser rule names", ruleNames.length);
-    return ruleNames;
-  }
-
-  /**
-   * Extract rule names from lexer.
-   *
-   * <p>Lexer.getRuleNames() is public API (Lexer extends Recognizer).
-   *
-   * @param lexer Lexer instance
-   * @return Array of lexer rule names
-   */
-  public static String[] extractLexerRuleNames(Lexer lexer) {
-    String[] ruleNames = lexer.getRuleNames();
-    log.debug("Extracted {} lexer rule names", ruleNames.length);
-    return ruleNames;
-  }
-
-  /**
-   * Extract channel names from lexer.
-   *
-   * <p>ANTLR 4.x exposes channel names via getChannelNames() method in generated lexers.
-   * This method dynamically extracts the actual channel names from the lexer instance.
-   *
-   * @param lexer Lexer instance
-   * @return Array of channel names
-   */
-  public static String[] extractChannelNames(Lexer lexer) {
-    String[] channelNames = lexer.getChannelNames();
-    log.debug("Extracted {} channel names from lexer", channelNames.length);
-    return channelNames;
-  }
-
-  /**
-   * Extract mode names from lexer.
-   *
-   * <p>ANTLR 4.x exposes mode names via getModeNames() method in generated lexers.
-   * This method dynamically extracts the actual mode names from the lexer instance.
-   *
-   * @param lexer Lexer instance
-   * @return Array of mode names
-   */
-  public static String[] extractModeNames(Lexer lexer) {
-    String[] modeNames = lexer.getModeNames();
-    log.debug("Extracted {} mode names from lexer", modeNames.length);
-    return modeNames;
-  }
-
-  /**
-   * Compute grammar hash from ATN data (recommended).
-   *
-   * <p>This method hashes the serialized ATN arrays directly, which:
-   *
-   * <ul>
-   *   <li>Always available at runtime (no classpath dependencies)
-   *   <li>Reflects the actual artifact being served
-   *   <li>Changes when grammar changes (ATN structure changes)
-   * </ul>
-   *
-   * @param lexerATN Serialized lexer ATN as int array
-   * @param parserATN Serialized parser ATN as int array
-   * @param antlrVersion ANTLR tool version (e.g., "4.13.2")
-   * @return Hash string in format "sha256:abc123..."
-   */
-  public static String computeGrammarHash(int[] lexerATN, int[] parserATN, String antlrVersion) {
-    try {
-      MessageDigest digest = MessageDigest.getInstance("SHA-256");
-
-      // Hash lexer ATN data
-      for (int value : lexerATN) {
-        digest.update((byte) (value >> 8));
-        digest.update((byte) value);
-      }
-
-      // Hash parser ATN data
-      for (int value : parserATN) {
-        digest.update((byte) (value >> 8));
-        digest.update((byte) value);
-      }
-
-      // Hash ANTLR version to detect generator changes
-      digest.update(antlrVersion.getBytes(StandardCharsets.UTF_8));
-
-      // Compute hash
-      byte[] hashBytes = digest.digest();
-      String result = "sha256:" + bytesToHex(hashBytes);
-
-      log.info("Computed grammar hash from ATN data: {}", result);
-      return result;
-
-    } catch (NoSuchAlgorithmException e) {
-      // SHA-256 is required by Java specification, this should never happen
-      throw new IllegalStateException("SHA-256 algorithm not available", e);
-    }
-  }
-
-
-  /**
-   * Convert byte array to hex string.
-   *
-   * @param bytes Input bytes
-   * @return Hex string (lowercase)
-   */
-  private static String bytesToHex(byte[] bytes) {
-    StringBuilder sb = new StringBuilder(bytes.length * 2);
-    for (byte b : bytes) {
-      sb.append(String.format("%02x", b & 0xFF));
-    }
-    return sb.toString();
-  }
-}
diff --git a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java b/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java
new file mode 100644
index 00000000000..6e4a456a22e
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.executor.autocomplete;
+
+import lombok.Builder;
+import lombok.Data;
+
+/**
+ * Grammar bundle containing everything needed for client-side ANTLR-based parsing.
+ *
+ * <p>Includes serialized ATN data (lexer + parser), vocabulary, and rule/channel/mode names.
+ * Language-agnostic — usable for PPL, SQL, or any ANTLR4 grammar.
+ */
+@Data
+@Builder
+public class GrammarBundle {
+
+  /** Bundle format version. Increment when the schema changes. */
+  private String bundleVersion;
+
+  /** SHA-256 hash of the serialized ATN data. Clients may use this to detect grammar changes. */
+  private String grammarHash;
+
+  /** Serialized lexer ATN as int array (ATNSerializer output). */
+  private int[] lexerSerializedATN;
+
+  /** Lexer rule names. */
+  private String[] lexerRuleNames;
+
+  /** Channel names (e.g. DEFAULT_TOKEN_CHANNEL, HIDDEN). */
+  private String[] channelNames;
+
+  /** Mode names (e.g. DEFAULT_MODE). */
+  private String[] modeNames;
+
+  /** Serialized parser ATN as int array (ATNSerializer output). */
+  private int[] parserSerializedATN;
+
+  /** Parser rule names. */
+  private String[] parserRuleNames;
+
+  /** Start rule index (0 = root rule). */
+  private int startRuleIndex;
+
+  /** Literal token names indexed by token type (e.g. "'search'", "'|'"). */
+  private String[] literalNames;
+
+  /** Symbolic token names indexed by token type (e.g. "SEARCH", "PIPE"). */
+  private String[] symbolicNames;
+}
diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
index c34e37ead00..0673dd7d467 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
@@ -16,8 +16,8 @@
 import org.opensearch.rest.BaseRestHandler;
 import org.opensearch.rest.BytesRestResponse;
 import org.opensearch.rest.RestRequest;
-import org.opensearch.sql.executor.autocomplete.AutocompleteArtifact;
-import org.opensearch.sql.ppl.autocomplete.PPLAutocompleteArtifactBuilder;
+import org.opensearch.sql.executor.autocomplete.GrammarBundle;
+import org.opensearch.sql.ppl.autocomplete.PPLGrammarBundleBuilder;
 import org.opensearch.transport.client.node.NodeClient;
 
 /**
@@ -40,9 +40,9 @@ public class RestPPLGrammarAction extends BaseRestHandler {
 
   private static final String ENDPOINT_PATH = "/_plugins/_ppl/_grammar";
 
-  // Lazy-initialized singleton artifact (built once per JVM lifecycle)
-  private volatile AutocompleteArtifact cachedArtifact;
-  private final Object artifactLock = new Object();
+  // Lazy-initialized singleton bundle (built once per JVM lifecycle)
+  private volatile GrammarBundle cachedBundle;
+  private final Object bundleLock = new Object();
 
   @Override
   public String getName() {
@@ -62,11 +62,10 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
 
     return channel -> {
       try {
-        // Get or build artifact (lazy initialization)
-        AutocompleteArtifact artifact = getOrBuildArtifact();
+        GrammarBundle bundle = getOrBuildBundle();
 
         XContentBuilder builder = channel.newBuilder();
-        serializeArtifact(builder, artifact);
+        serializeBundle(builder, bundle);
 
         BytesRestResponse response = new BytesRestResponse(RestStatus.OK, builder);
         log.info("Returning PPL grammar (size: {} bytes)", response.content().length());
@@ -80,95 +79,93 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
   }
 
   /**
-   * Get cached artifact or build it if not initialized.
+   * Get cached bundle or build it if not yet initialized.
    *
    * <p>Thread-safe lazy initialization with double-checked locking.
    */
-  private AutocompleteArtifact getOrBuildArtifact() {
+  private GrammarBundle getOrBuildBundle() {
     // First check without lock (common case: already initialized)
-    if (cachedArtifact != null) {
-      return cachedArtifact;
+    if (cachedBundle != null) {
+      return cachedBundle;
     }
 
     // Acquire lock for initialization
-    synchronized (artifactLock) {
+    synchronized (bundleLock) {
       // Double-check after acquiring lock
-      if (cachedArtifact == null) {
-        log.info("Building PPL grammar artifact (first request)...");
+      if (cachedBundle == null) {
+        log.info("Building PPL grammar bundle (first request)...");
         long startTime = System.currentTimeMillis();
 
-        PPLAutocompleteArtifactBuilder builder = new PPLAutocompleteArtifactBuilder();
-        cachedArtifact = builder.buildArtifact();
+        PPLGrammarBundleBuilder builder = new PPLGrammarBundleBuilder();
+        cachedBundle = builder.build();
 
         long elapsed = System.currentTimeMillis() - startTime;
-        log.info("Built PPL grammar in {}ms (hash: {})", elapsed, cachedArtifact.getGrammarHash());
+        log.info("Built PPL grammar in {}ms (hash: {})", elapsed, cachedBundle.getGrammarHash());
       }
-      return cachedArtifact;
+      return cachedBundle;
     }
   }
 
   /**
-   * Invalidate cached artifact (for testing or grammar updates).
+   * Invalidate cached bundle (for testing or grammar updates).
    *
    * <p>Note: This only affects the current node. In a multi-node cluster, each node maintains its
    * own cache.
    */
   public void invalidateCache() {
-    synchronized (artifactLock) {
-      log.info("Invalidating cached PPL grammar");
-      cachedArtifact = null;
+    synchronized (bundleLock) {
+      log.info("Invalidating cached PPL grammar bundle");
+      cachedBundle = null;
     }
   }
 
   /**
-   * Manually serialize AutocompleteArtifact to JSON.
-   *
-   * <p>Outputs ATN data and arrays as standard JSON arrays.
+   * Serialize {@link GrammarBundle} to JSON.
    */
-  private void serializeArtifact(XContentBuilder builder, AutocompleteArtifact artifact)
+  private void serializeBundle(XContentBuilder builder, GrammarBundle bundle)
       throws IOException {
     builder.startObject();
 
     // Identity & versioning
-    builder.field("bundleVersion", artifact.getBundleVersion());
-    builder.field("grammarHash", artifact.getGrammarHash());
-    builder.field("startRuleIndex", artifact.getStartRuleIndex());
+    builder.field("bundleVersion", bundle.getBundleVersion());
+    builder.field("grammarHash", bundle.getGrammarHash());
+    builder.field("startRuleIndex", bundle.getStartRuleIndex());
 
     // Lexer ATN & metadata
-    if (artifact.getLexerSerializedATN() != null) {
-      builder.field("lexerSerializedATN", artifact.getLexerSerializedATN());
-      log.debug("Lexer ATN: {} elements", artifact.getLexerSerializedATN().length);
+    if (bundle.getLexerSerializedATN() != null) {
+      builder.field("lexerSerializedATN", bundle.getLexerSerializedATN());
+      log.debug("Lexer ATN: {} elements", bundle.getLexerSerializedATN().length);
     }
 
-    if (artifact.getLexerRuleNames() != null) {
-      builder.field("lexerRuleNames", artifact.getLexerRuleNames());
+    if (bundle.getLexerRuleNames() != null) {
+      builder.field("lexerRuleNames", bundle.getLexerRuleNames());
     }
 
-    if (artifact.getChannelNames() != null) {
-      builder.field("channelNames", artifact.getChannelNames());
+    if (bundle.getChannelNames() != null) {
+      builder.field("channelNames", bundle.getChannelNames());
     }
 
-    if (artifact.getModeNames() != null) {
-      builder.field("modeNames", artifact.getModeNames());
+    if (bundle.getModeNames() != null) {
+      builder.field("modeNames", bundle.getModeNames());
     }
 
     // Parser ATN & metadata
-    if (artifact.getParserSerializedATN() != null) {
-      builder.field("parserSerializedATN", artifact.getParserSerializedATN());
-      log.debug("Parser ATN: {} elements", artifact.getParserSerializedATN().length);
+    if (bundle.getParserSerializedATN() != null) {
+      builder.field("parserSerializedATN", bundle.getParserSerializedATN());
+      log.debug("Parser ATN: {} elements", bundle.getParserSerializedATN().length);
     }
 
-    if (artifact.getParserRuleNames() != null) {
-      builder.field("parserRuleNames", artifact.getParserRuleNames());
+    if (bundle.getParserRuleNames() != null) {
+      builder.field("parserRuleNames", bundle.getParserRuleNames());
     }
 
     // Vocabulary
-    if (artifact.getLiteralNames() != null) {
-      builder.field("literalNames", artifact.getLiteralNames());
+    if (bundle.getLiteralNames() != null) {
+      builder.field("literalNames", bundle.getLiteralNames());
     }
 
-    if (artifact.getSymbolicNames() != null) {
-      builder.field("symbolicNames", artifact.getSymbolicNames());
+    if (bundle.getSymbolicNames() != null) {
+      builder.field("symbolicNames", bundle.getSymbolicNames());
     }
 
     builder.endObject();
diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
index b0f023b1923..6f7ed82dc75 100644
--- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -108,7 +108,7 @@ public void testGetGrammar_ArtifactIsCached() throws Exception {
     action.prepareRequest(request2, client).accept(channel2);
     long elapsed2 = System.currentTimeMillis() - startTime2;
 
-    // Second request should be faster (artifact cached)
+    // Second request should be faster (bundle cached)
     assertTrue(
         "Second request should be faster due to caching (elapsed1="
             + elapsed1
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java
deleted file mode 100644
index d6e11b40848..00000000000
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLAutocompleteArtifactBuilder.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
- */
-
-package org.opensearch.sql.ppl.autocomplete;
-
-import lombok.extern.log4j.Log4j2;
-import org.antlr.v4.runtime.CharStreams;
-import org.antlr.v4.runtime.CommonTokenStream;
-import org.antlr.v4.runtime.Vocabulary;
-import org.antlr.v4.runtime.atn.ATNSerializer;
-import org.opensearch.sql.executor.autocomplete.AutocompleteArtifact;
-import org.opensearch.sql.executor.autocomplete.GrammarArtifactBuilder;
-import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLLexer;
-import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser;
-
-/**
- * Builds autocomplete artifact bundle for PPL language.
- *
- * <p>This class:
- *
- * <ul>
- *   <li>Extracts ATN and vocabulary from generated PPL parser/lexer
- *   <li>Builds PPL-specific catalogs (commands, functions, keywords)
- *   <li>Creates token-to-category mapping for suggestion classification
- *   <li>Computes grammar hash for versioning
- * </ul>
- *
- * <p>The resulting bundle is served via REST API and cached on frontend for client-side
- * autocomplete.
- */
-@Log4j2
-public class PPLAutocompleteArtifactBuilder {
-
-  private static final String ANTLR_VERSION = "4.13.2";
-  private static final String BUNDLE_VERSION = "1.0";
-  private static final String RUNTIME_TARGET = "antlr4ng@3.x";
-
-  /**
-   * Build complete autocomplete artifact for PPL using optimized encoding.
-   *
-   * <p>This version uses:
-   *
-   * <ul>
-   *   <li>Base64-encoded ATN (5-15x smaller than JSON int arrays)
-   *   <li>Newline-packed string arrays (smaller than JSON arrays)
-   *   <li>NO tokenTypeToCategory map (derive from naming conventions)
-   * </ul>
-   *
-   * @return AutocompleteArtifact ready for JSON serialization
-   */
-  public AutocompleteArtifact buildArtifact() {
-    log.info("Building optimized PPL autocomplete artifact...");
-
-    // Create parser and lexer instances (with dummy input)
-    OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString(""));
-    CommonTokenStream tokens = new CommonTokenStream(lexer);
-    OpenSearchPPLParser parser = new OpenSearchPPLParser(tokens);
-
-    // Do NOT use lexer.getSerializedATN().chars().toArray() — that gives
-    // the raw UTF-16 char encoding which has offset values and causes
-    // "state type 65535 is not valid" errors in the frontend deserializer.
-    int[] lexerATNArray = new ATNSerializer(lexer.getATN()).serialize().toArray();
-    int[] parserATNArray = new ATNSerializer(parser.getATN()).serialize().toArray();
-
-    log.info("ATN data ready:");
-    log.info("  Lexer ATN: {} elements", lexerATNArray.length);
-    log.info("  Parser ATN: {} elements", parserATNArray.length);
-
-    // Extract vocabulary
-    Vocabulary vocabulary = parser.getVocabulary();
-    String[] literalNames = GrammarArtifactBuilder.extractLiteralNames(vocabulary);
-    String[] symbolicNames = GrammarArtifactBuilder.extractSymbolicNames(vocabulary);
-
-    // Extract rule names
-    String[] parserRuleNames = GrammarArtifactBuilder.extractParserRuleNames(parser);
-    String[] lexerRuleNames = GrammarArtifactBuilder.extractLexerRuleNames(lexer);
-
-    // Extract channel/mode names from lexer
-    String[] channelNames = GrammarArtifactBuilder.extractChannelNames(lexer);
-    String[] modeNames = GrammarArtifactBuilder.extractModeNames(lexer);
-
-    // Compute grammar hash from ATN data (always available, no classpath dependency)
-    String grammarHash =
-        GrammarArtifactBuilder.computeGrammarHash(lexerATNArray, parserATNArray, ANTLR_VERSION);
-
-    // Build minimal artifact with only essential fields
-    AutocompleteArtifact artifact =
-        AutocompleteArtifact.builder()
-            .bundleVersion(BUNDLE_VERSION)
-            .grammarHash(grammarHash)
-            // ATN data (required for interpreters)
-            .lexerSerializedATN(lexerATNArray)
-            .parserSerializedATN(parserATNArray)
-            // Rule/channel/mode names (required for interpreters)
-            .lexerRuleNames(lexerRuleNames)
-            .parserRuleNames(parserRuleNames)
-            .channelNames(channelNames)
-            .modeNames(modeNames)
-            .startRuleIndex(0) // "root" rule
-            // Vocabulary (required for token display)
-            .literalNames(literalNames)
-            .symbolicNames(symbolicNames)
-            // NO tokenTypeToCategory - removed for size (derive from conventions)
-            .tokenTypeToCategory(null)
-            .build();
-
-    log.info(
-        "Built optimized PPL artifact: {} tokens, lexer ATN: {} elements, parser ATN: {} elements",
-        symbolicNames.length,
-        lexerATNArray.length,
-        parserATNArray.length);
-    log.info(
-        "ATN transfer size: ~{}KB (JSON int[]), ~{}KB (gzipped)",
-        (lexerATNArray.length + parserATNArray.length) * 4 / 1024,
-        (lexerATNArray.length + parserATNArray.length) * 2 / 1024);
-
-    return artifact;
-  }
-
-}
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
new file mode 100644
index 00000000000..ce73f0d3b5f
--- /dev/null
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.ppl.autocomplete;
+
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import lombok.extern.log4j.Log4j2;
+import org.antlr.v4.runtime.CharStreams;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.Vocabulary;
+import org.antlr.v4.runtime.atn.ATNSerializer;
+import org.opensearch.sql.executor.autocomplete.GrammarBundle;
+import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLLexer;
+import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser;
+
+/** Builds the {@link GrammarBundle} for the PPL language from the generated ANTLR lexer/parser. */
+@Log4j2
+public class PPLGrammarBundleBuilder {
+
+  private static final String ANTLR_VERSION = "4.13.2";
+  private static final String BUNDLE_VERSION = "1.0";
+
+  /**
+   * Build the PPL grammar bundle.
+   *
+   * <p>Uses {@link ATNSerializer} to re-serialize the ATN into the int[] format expected by the
+   * antlr4ng runtime on the frontend. Do NOT use {@code lexer.getSerializedATN().chars().toArray()}
+   * — that yields raw UTF-16 char values which cause "state type 65535 is not valid" errors.
+   *
+   * @return {@link GrammarBundle} ready for JSON serialization
+   */
+  public GrammarBundle build() {
+    log.info("Building PPL grammar bundle...");
+
+    OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString(""));
+    CommonTokenStream tokens = new CommonTokenStream(lexer);
+    OpenSearchPPLParser parser = new OpenSearchPPLParser(tokens);
+
+    int[] lexerATN = new ATNSerializer(lexer.getATN()).serialize().toArray();
+    int[] parserATN = new ATNSerializer(parser.getATN()).serialize().toArray();
+
+    log.info("Lexer ATN: {} elements, Parser ATN: {} elements", lexerATN.length, parserATN.length);
+
+    Vocabulary vocabulary = parser.getVocabulary();
+    int maxTokenType = vocabulary.getMaxTokenType();
+    String[] literalNames = new String[maxTokenType + 1];
+    String[] symbolicNames = new String[maxTokenType + 1];
+    for (int i = 0; i <= maxTokenType; i++) {
+      literalNames[i] = vocabulary.getLiteralName(i);
+      symbolicNames[i] = vocabulary.getSymbolicName(i);
+    }
+
+    GrammarBundle bundle =
+        GrammarBundle.builder()
+            .bundleVersion(BUNDLE_VERSION)
+            .grammarHash(computeGrammarHash(lexerATN, parserATN))
+            .lexerSerializedATN(lexerATN)
+            .parserSerializedATN(parserATN)
+            .lexerRuleNames(lexer.getRuleNames())
+            .parserRuleNames(parser.getRuleNames())
+            .channelNames(lexer.getChannelNames())
+            .modeNames(lexer.getModeNames())
+            .startRuleIndex(0)
+            .literalNames(literalNames)
+            .symbolicNames(symbolicNames)
+            .build();
+
+    log.info("Built PPL grammar bundle: {} tokens", symbolicNames.length);
+    return bundle;
+  }
+
+  private static String computeGrammarHash(int[] lexerATN, int[] parserATN) {
+    try {
+      MessageDigest digest = MessageDigest.getInstance("SHA-256");
+      for (int v : lexerATN) {
+        digest.update((byte) (v >> 8));
+        digest.update((byte) v);
+      }
+      for (int v : parserATN) {
+        digest.update((byte) (v >> 8));
+        digest.update((byte) v);
+      }
+      digest.update(ANTLR_VERSION.getBytes(StandardCharsets.UTF_8));
+      byte[] hash = digest.digest();
+      StringBuilder sb = new StringBuilder("sha256:");
+      for (byte b : hash) {
+        sb.append(String.format("%02x", b & 0xFF));
+      }
+      return sb.toString();
+    } catch (NoSuchAlgorithmException e) {
+      throw new IllegalStateException("SHA-256 not available", e);
+    }
+  }
+}

From b9f351351966a06cb03215e349a3b324a760506e Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Thu, 19 Feb 2026 11:05:30 -0800
Subject: [PATCH 04/20] cleanup on comments and debug logging

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../executor/autocomplete/GrammarBundle.java  |  9 +---
 .../sql/plugin/rest/RestPPLGrammarAction.java | 45 ++---------------
 .../plugin/rest/RestPPLGrammarActionTest.java | 20 +-------
 .../autocomplete/PPLGrammarBundleBuilder.java | 48 +++++++------------
 4 files changed, 25 insertions(+), 97 deletions(-)

diff --git a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java b/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java
index 6e4a456a22e..7b3720881aa 100644
--- a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java
+++ b/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java
@@ -8,17 +8,12 @@
 import lombok.Builder;
 import lombok.Data;
 
-/**
- * Grammar bundle containing everything needed for client-side ANTLR-based parsing.
- *
- * <p>Includes serialized ATN data (lexer + parser), vocabulary, and rule/channel/mode names.
- * Language-agnostic — usable for PPL, SQL, or any ANTLR4 grammar.
- */
+/** Response payload for the {@code GET /_plugins/_ppl/_grammar} endpoint. */
 @Data
 @Builder
 public class GrammarBundle {
 
-  /** Bundle format version. Increment when the schema changes. */
+  /** Bundle format version. */
   private String bundleVersion;
 
   /** SHA-256 hash of the serialized ATN data. Clients may use this to detect grammar changes. */
diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
index 0673dd7d467..5cc8c72a631 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
@@ -20,21 +20,7 @@
 import org.opensearch.sql.ppl.autocomplete.PPLGrammarBundleBuilder;
 import org.opensearch.transport.client.node.NodeClient;
 
-/**
- * REST handler for PPL grammar metadata endpoint.
- *
- * <p>Endpoint: GET /_plugins/_ppl/_grammar
- *
- * <p>Returns grammar data for client-side parsing, autocomplete, syntax highlighting, etc:
- *
- * <ul>
- *   <li>Serialized ANTLR ATN data (lexer + parser)
- *   <li>Vocabulary (literal and symbolic names)
- *   <li>Rule names (parser and lexer)
- *   <li>Channel and mode names
- * </ul>
- *
- */
+/** REST handler for {@code GET /_plugins/_ppl/_grammar}. */
 @Log4j2
 public class RestPPLGrammarAction extends BaseRestHandler {
 
@@ -58,8 +44,6 @@ public List<Route> routes() {
   protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client)
       throws IOException {
 
-    log.debug("Received PPL grammar request");
-
     return channel -> {
       try {
         GrammarBundle bundle = getOrBuildBundle();
@@ -67,9 +51,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
         XContentBuilder builder = channel.newBuilder();
         serializeBundle(builder, bundle);
 
-        BytesRestResponse response = new BytesRestResponse(RestStatus.OK, builder);
-        log.info("Returning PPL grammar (size: {} bytes)", response.content().length());
-        channel.sendResponse(response);
+        channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder));
 
       } catch (Exception e) {
         log.error("Error building or serializing PPL grammar", e);
@@ -78,20 +60,13 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
     };
   }
 
-  /**
-   * Get cached bundle or build it if not yet initialized.
-   *
-   * <p>Thread-safe lazy initialization with double-checked locking.
-   */
+  // Thread-safe lazy initialization with double-checked locking.
   private GrammarBundle getOrBuildBundle() {
-    // First check without lock (common case: already initialized)
     if (cachedBundle != null) {
       return cachedBundle;
     }
-
-    // Acquire lock for initialization
     synchronized (bundleLock) {
-      // Double-check after acquiring lock
+      // double-check after acquiring lock
       if (cachedBundle == null) {
         log.info("Building PPL grammar bundle (first request)...");
         long startTime = System.currentTimeMillis();
@@ -106,12 +81,7 @@ private GrammarBundle getOrBuildBundle() {
     }
   }
 
-  /**
-   * Invalidate cached bundle (for testing or grammar updates).
-   *
-   * <p>Note: This only affects the current node. In a multi-node cluster, each node maintains its
-   * own cache.
-   */
+  /** Invalidate the cached bundle, forcing a rebuild on the next request. */
   public void invalidateCache() {
     synchronized (bundleLock) {
       log.info("Invalidating cached PPL grammar bundle");
@@ -119,9 +89,6 @@ public void invalidateCache() {
     }
   }
 
-  /**
-   * Serialize {@link GrammarBundle} to JSON.
-   */
   private void serializeBundle(XContentBuilder builder, GrammarBundle bundle)
       throws IOException {
     builder.startObject();
@@ -134,7 +101,6 @@ private void serializeBundle(XContentBuilder builder, GrammarBundle bundle)
     // Lexer ATN & metadata
     if (bundle.getLexerSerializedATN() != null) {
       builder.field("lexerSerializedATN", bundle.getLexerSerializedATN());
-      log.debug("Lexer ATN: {} elements", bundle.getLexerSerializedATN().length);
     }
 
     if (bundle.getLexerRuleNames() != null) {
@@ -152,7 +118,6 @@ private void serializeBundle(XContentBuilder builder, GrammarBundle bundle)
     // Parser ATN & metadata
     if (bundle.getParserSerializedATN() != null) {
       builder.field("parserSerializedATN", bundle.getParserSerializedATN());
-      log.debug("Parser ATN: {} elements", bundle.getParserSerializedATN().length);
     }
 
     if (bundle.getParserRuleNames() != null) {
diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
index 6f7ed82dc75..031b32ddb74 100644
--- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -23,17 +23,7 @@
 import org.opensearch.test.rest.FakeRestRequest;
 import org.opensearch.transport.client.node.NodeClient;
 
-/**
- * Unit tests for RestPPLGrammarAction.
- *
- * <p>Tests:
- *
- * <ul>
- *   <li>200 OK response with grammar bundle
- *   <li>Grammar structure validation
- *   <li>Cache behavior
- * </ul>
- */
+/** Unit tests for {@link RestPPLGrammarAction}. */
 public class RestPPLGrammarActionTest extends OpenSearchTestCase {
 
   private RestPPLGrammarAction action;
@@ -83,8 +73,7 @@ public void testGetGrammar_ReturnsBundle() throws Exception {
   }
 
   @Test
-  public void testGetGrammar_ArtifactIsCached() throws Exception {
-    // Make two requests
+  public void testGetGrammar_BundleIsCached() throws Exception {
     FakeRestRequest request1 =
         new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
             .withMethod(RestRequest.Method.GET)
@@ -96,7 +85,6 @@ public void testGetGrammar_ArtifactIsCached() throws Exception {
     action.prepareRequest(request1, client).accept(channel1);
     long elapsed1 = System.currentTimeMillis() - startTime1;
 
-    // Second request should be faster (cached)
     FakeRestRequest request2 =
         new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
             .withMethod(RestRequest.Method.GET)
@@ -108,7 +96,6 @@ public void testGetGrammar_ArtifactIsCached() throws Exception {
     action.prepareRequest(request2, client).accept(channel2);
     long elapsed2 = System.currentTimeMillis() - startTime2;
 
-    // Second request should be faster (bundle cached)
     assertTrue(
         "Second request should be faster due to caching (elapsed1="
             + elapsed1
@@ -120,7 +107,6 @@ public void testGetGrammar_ArtifactIsCached() throws Exception {
 
   @Test
   public void testInvalidateCache() throws Exception {
-    // Get grammar
     FakeRestRequest request1 =
         new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
             .withMethod(RestRequest.Method.GET)
@@ -131,10 +117,8 @@ public void testInvalidateCache() throws Exception {
     action.prepareRequest(request1, client).accept(channel1);
     assertEquals(RestStatus.OK, channel1.getResponse().status());
 
-    // Invalidate cache
     action.invalidateCache();
 
-    // Get grammar again — should still return 200 OK with same content
     FakeRestRequest request2 =
         new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
             .withMethod(RestRequest.Method.GET)
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
index ce73f0d3b5f..a5ee5bd4e43 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
@@ -8,7 +8,6 @@
 import java.nio.charset.StandardCharsets;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
-import lombok.extern.log4j.Log4j2;
 import org.antlr.v4.runtime.CharStreams;
 import org.antlr.v4.runtime.CommonTokenStream;
 import org.antlr.v4.runtime.Vocabulary;
@@ -18,33 +17,22 @@
 import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser;
 
 /** Builds the {@link GrammarBundle} for the PPL language from the generated ANTLR lexer/parser. */
-@Log4j2
 public class PPLGrammarBundleBuilder {
 
   private static final String ANTLR_VERSION = "4.13.2";
   private static final String BUNDLE_VERSION = "1.0";
 
-  /**
-   * Build the PPL grammar bundle.
-   *
-   * <p>Uses {@link ATNSerializer} to re-serialize the ATN into the int[] format expected by the
-   * antlr4ng runtime on the frontend. Do NOT use {@code lexer.getSerializedATN().chars().toArray()}
-   * — that yields raw UTF-16 char values which cause "state type 65535 is not valid" errors.
-   *
-   * @return {@link GrammarBundle} ready for JSON serialization
-   */
   public GrammarBundle build() {
-    log.info("Building PPL grammar bundle...");
-
     OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString(""));
     CommonTokenStream tokens = new CommonTokenStream(lexer);
     OpenSearchPPLParser parser = new OpenSearchPPLParser(tokens);
 
+    // ATNSerializer re-serializes the ATN into the int[] format expected by antlr4ng.
+    // Do not use lexer.getSerializedATN().chars().toArray() — that yields raw UTF-16 char values
+    // which cause "state type 65535 is not valid" errors in the frontend deserializer.
     int[] lexerATN = new ATNSerializer(lexer.getATN()).serialize().toArray();
     int[] parserATN = new ATNSerializer(parser.getATN()).serialize().toArray();
 
-    log.info("Lexer ATN: {} elements, Parser ATN: {} elements", lexerATN.length, parserATN.length);
-
     Vocabulary vocabulary = parser.getVocabulary();
     int maxTokenType = vocabulary.getMaxTokenType();
     String[] literalNames = new String[maxTokenType + 1];
@@ -54,23 +42,19 @@ public GrammarBundle build() {
       symbolicNames[i] = vocabulary.getSymbolicName(i);
     }
 
-    GrammarBundle bundle =
-        GrammarBundle.builder()
-            .bundleVersion(BUNDLE_VERSION)
-            .grammarHash(computeGrammarHash(lexerATN, parserATN))
-            .lexerSerializedATN(lexerATN)
-            .parserSerializedATN(parserATN)
-            .lexerRuleNames(lexer.getRuleNames())
-            .parserRuleNames(parser.getRuleNames())
-            .channelNames(lexer.getChannelNames())
-            .modeNames(lexer.getModeNames())
-            .startRuleIndex(0)
-            .literalNames(literalNames)
-            .symbolicNames(symbolicNames)
-            .build();
-
-    log.info("Built PPL grammar bundle: {} tokens", symbolicNames.length);
-    return bundle;
+    return GrammarBundle.builder()
+        .bundleVersion(BUNDLE_VERSION)
+        .grammarHash(computeGrammarHash(lexerATN, parserATN))
+        .lexerSerializedATN(lexerATN)
+        .parserSerializedATN(parserATN)
+        .lexerRuleNames(lexer.getRuleNames())
+        .parserRuleNames(parser.getRuleNames())
+        .channelNames(lexer.getChannelNames())
+        .modeNames(lexer.getModeNames())
+        .startRuleIndex(0)
+        .literalNames(literalNames)
+        .symbolicNames(symbolicNames)
+        .build();
   }
 
   private static String computeGrammarHash(int[] lexerATN, int[] parserATN) {

From 937375b3b32108ec0f31b2882a273b7ba89f4b5d Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Thu, 19 Feb 2026 14:07:50 -0800
Subject: [PATCH 05/20] modify tests

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../plugin/rest/RestPPLGrammarActionTest.java |  91 ++++++++--------
 .../PPLGrammarBundleBuilderTest.java          | 100 ++++++++++++++++++
 2 files changed, 149 insertions(+), 42 deletions(-)
 create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java

diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
index 031b32ddb74..f58a8f0b5b4 100644
--- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -10,28 +10,29 @@
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.mock;
 
-import java.io.ByteArrayOutputStream;
-import java.nio.charset.StandardCharsets;
+import java.io.IOException;
 import org.junit.Before;
 import org.junit.Test;
+import org.opensearch.common.io.stream.BytesStreamOutput;
+import org.opensearch.common.xcontent.XContentType;
 import org.opensearch.core.rest.RestStatus;
+import org.opensearch.core.xcontent.MediaType;
 import org.opensearch.core.xcontent.NamedXContentRegistry;
+import org.opensearch.core.xcontent.XContentBuilder;
 import org.opensearch.rest.RestChannel;
 import org.opensearch.rest.RestRequest;
 import org.opensearch.rest.RestResponse;
-import org.opensearch.test.OpenSearchTestCase;
 import org.opensearch.test.rest.FakeRestRequest;
 import org.opensearch.transport.client.node.NodeClient;
 
 /** Unit tests for {@link RestPPLGrammarAction}. */
-public class RestPPLGrammarActionTest extends OpenSearchTestCase {
+public class RestPPLGrammarActionTest {
 
   private RestPPLGrammarAction action;
   private NodeClient client;
 
   @Before
-  public void setUp() throws Exception {
-    super.setUp();
+  public void setUp() {
     action = new RestPPLGrammarAction();
     client = mock(NodeClient.class);
   }
@@ -57,19 +58,22 @@ public void testGetGrammar_ReturnsBundle() throws Exception {
             .build();
 
     MockRestChannel channel = new MockRestChannel(request, true);
-    action.prepareRequest(request, client).accept(channel);
+    action.handleRequest(request, channel, client);
 
     RestResponse response = channel.getResponse();
     assertNotNull("Response should not be null", response);
     assertEquals("Should return 200 OK", RestStatus.OK, response.status());
 
-    String content = new String(response.content().array(), StandardCharsets.UTF_8);
-    assertTrue("Should contain bundleVersion", content.contains("\"bundleVersion\":"));
-    assertTrue("Should contain grammarHash", content.contains("\"grammarHash\":"));
-    assertTrue("Should contain lexerSerializedATN", content.contains("\"lexerSerializedATN\":"));
-    assertTrue("Should contain parserSerializedATN", content.contains("\"parserSerializedATN\":"));
-    assertTrue("Should contain literalNames", content.contains("\"literalNames\":"));
-    assertTrue("Should contain symbolicNames", content.contains("\"symbolicNames\":"));
+    String content = response.content().utf8ToString();
+    assertTrue(content.contains("\"bundleVersion\":\"1.0\""));
+    assertTrue(content.contains("\"grammarHash\":\"sha256:"));
+    assertTrue(content.contains("\"startRuleIndex\":0"));
+    assertTrue(content.contains("\"lexerSerializedATN\":"));
+    assertTrue(content.contains("\"parserSerializedATN\":"));
+    assertTrue(content.contains("\"lexerRuleNames\":"));
+    assertTrue(content.contains("\"parserRuleNames\":"));
+    assertTrue(content.contains("\"literalNames\":"));
+    assertTrue(content.contains("\"symbolicNames\":"));
   }
 
   @Test
@@ -79,30 +83,20 @@ public void testGetGrammar_BundleIsCached() throws Exception {
             .withMethod(RestRequest.Method.GET)
             .withPath("/_plugins/_ppl/_grammar")
             .build();
-
     MockRestChannel channel1 = new MockRestChannel(request1, true);
-    long startTime1 = System.currentTimeMillis();
-    action.prepareRequest(request1, client).accept(channel1);
-    long elapsed1 = System.currentTimeMillis() - startTime1;
+    action.handleRequest(request1, channel1, client);
 
     FakeRestRequest request2 =
         new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
             .withMethod(RestRequest.Method.GET)
             .withPath("/_plugins/_ppl/_grammar")
             .build();
-
     MockRestChannel channel2 = new MockRestChannel(request2, true);
-    long startTime2 = System.currentTimeMillis();
-    action.prepareRequest(request2, client).accept(channel2);
-    long elapsed2 = System.currentTimeMillis() - startTime2;
-
-    assertTrue(
-        "Second request should be faster due to caching (elapsed1="
-            + elapsed1
-            + "ms, elapsed2="
-            + elapsed2
-            + "ms)",
-        elapsed2 < elapsed1 || elapsed2 < 50); // Allow some variance
+    action.handleRequest(request2, channel2, client);
+
+    String content1 = channel1.getResponse().content().utf8ToString();
+    String content2 = channel2.getResponse().content().utf8ToString();
+    assertEquals("Consecutive requests should return identical content", content1, content2);
   }
 
   @Test
@@ -112,9 +106,8 @@ public void testInvalidateCache() throws Exception {
             .withMethod(RestRequest.Method.GET)
             .withPath("/_plugins/_ppl/_grammar")
             .build();
-
     MockRestChannel channel1 = new MockRestChannel(request1, true);
-    action.prepareRequest(request1, client).accept(channel1);
+    action.handleRequest(request1, channel1, client);
     assertEquals(RestStatus.OK, channel1.getResponse().status());
 
     action.invalidateCache();
@@ -124,10 +117,13 @@ public void testInvalidateCache() throws Exception {
             .withMethod(RestRequest.Method.GET)
             .withPath("/_plugins/_ppl/_grammar")
             .build();
-
     MockRestChannel channel2 = new MockRestChannel(request2, true);
-    action.prepareRequest(request2, client).accept(channel2);
+    action.handleRequest(request2, channel2, client);
     assertEquals(RestStatus.OK, channel2.getResponse().status());
+
+    String content1 = channel1.getResponse().content().utf8ToString();
+    String content2 = channel2.getResponse().content().utf8ToString();
+    assertEquals("Grammar hash should be identical after cache invalidation and rebuild", content1, content2);
   }
 
   /** Mock RestChannel to capture responses */
@@ -161,23 +157,34 @@ public boolean detailedErrorsEnabled() {
     }
 
     @Override
-    public org.opensearch.core.xcontent.XContentBuilder newBuilder() {
-      return null;
+    public boolean detailedErrorStackTraceEnabled() {
+      return false;
     }
 
     @Override
-    public org.opensearch.core.xcontent.XContentBuilder newErrorBuilder() {
-      return null;
+    public XContentBuilder newBuilder() throws IOException {
+      return XContentBuilder.builder(XContentType.JSON.xContent());
     }
 
     @Override
-    public org.opensearch.core.xcontent.XContentBuilder newBuilder(
-        org.opensearch.core.xcontent.MediaType mediaType, boolean useFiltering) {
-      return null;
+    public XContentBuilder newErrorBuilder() throws IOException {
+      return XContentBuilder.builder(XContentType.JSON.xContent());
+    }
+
+    @Override
+    public XContentBuilder newBuilder(MediaType mediaType, boolean useFiltering) throws IOException {
+      return XContentBuilder.builder(XContentType.JSON.xContent());
+    }
+
+    @Override
+    public XContentBuilder newBuilder(
+        MediaType requestContentType, MediaType responseContentType, boolean useFiltering)
+        throws IOException {
+      return XContentBuilder.builder(XContentType.JSON.xContent());
     }
 
     @Override
-    public ByteArrayOutputStream bytesOutput() {
+    public BytesStreamOutput bytesOutput() {
       return null;
     }
   }
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
new file mode 100644
index 00000000000..c2501090ab0
--- /dev/null
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.ppl.autocomplete;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.opensearch.sql.executor.autocomplete.GrammarBundle;
+
+public class PPLGrammarBundleBuilderTest {
+
+  private static GrammarBundle bundle;
+
+  @BeforeClass
+  public static void buildBundle() {
+    bundle = new PPLGrammarBundleBuilder().build();
+  }
+
+  @Test
+  public void bundleIsNotNull() {
+    assertNotNull(bundle);
+  }
+
+  @Test
+  public void bundleVersionIsSet() {
+    assertEquals("1.0", bundle.getBundleVersion());
+  }
+
+  @Test
+  public void grammarHashHasExpectedFormat() {
+    String hash = bundle.getGrammarHash();
+    assertNotNull(hash);
+    assertTrue("grammarHash should start with 'sha256:'", hash.startsWith("sha256:"));
+    assertTrue("grammarHash should be 71 chars (sha256: + 64 hex)", hash.length() == 71);
+  }
+
+  @Test
+  public void startRuleIndexIsZero() {
+    assertEquals(0, bundle.getStartRuleIndex());
+  }
+
+  @Test
+  public void lexerATNIsNonEmpty() {
+    assertNotNull(bundle.getLexerSerializedATN());
+    assertTrue(bundle.getLexerSerializedATN().length > 0);
+  }
+
+  @Test
+  public void parserATNIsNonEmpty() {
+    assertNotNull(bundle.getParserSerializedATN());
+    assertTrue(bundle.getParserSerializedATN().length > 0);
+  }
+
+  @Test
+  public void lexerRuleNamesAreNonEmpty() {
+    assertNotNull(bundle.getLexerRuleNames());
+    assertTrue(bundle.getLexerRuleNames().length > 0);
+  }
+
+  @Test
+  public void parserRuleNamesAreNonEmpty() {
+    assertNotNull(bundle.getParserRuleNames());
+    assertTrue(bundle.getParserRuleNames().length > 0);
+  }
+
+  @Test
+  public void channelNamesAreNonEmpty() {
+    assertNotNull(bundle.getChannelNames());
+    assertTrue(bundle.getChannelNames().length > 0);
+  }
+
+  @Test
+  public void modeNamesAreNonEmpty() {
+    assertNotNull(bundle.getModeNames());
+    assertTrue(bundle.getModeNames().length > 0);
+  }
+
+  @Test
+  public void vocabularyIsNonEmpty() {
+    assertNotNull(bundle.getLiteralNames());
+    assertNotNull(bundle.getSymbolicNames());
+    assertTrue(bundle.getLiteralNames().length > 0);
+    assertTrue(bundle.getSymbolicNames().length > 0);
+  }
+
+  @Test
+  public void buildIsDeterministic() {
+    GrammarBundle second = new PPLGrammarBundleBuilder().build();
+    assertEquals(
+        "Two builds of the same grammar should produce the same hash",
+        bundle.getGrammarHash(),
+        second.getGrammarHash());
+  }
+}

From c7d1b8978c231c6632cc7e29232de85a26924376 Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Thu, 19 Feb 2026 14:42:20 -0800
Subject: [PATCH 06/20] a few mode cleanup

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../sql/plugin/rest/RestPPLGrammarAction.java | 63 ++++++-------------
 .../plugin/rest/RestPPLGrammarActionTest.java | 48 ++++++++++++--
 .../sql/ppl}/autocomplete/GrammarBundle.java  | 25 ++++----
 .../autocomplete/PPLGrammarBundleBuilder.java |  8 ++-
 .../PPLGrammarBundleBuilderTest.java          |  3 +-
 5 files changed, 81 insertions(+), 66 deletions(-)
 rename {core/src/main/java/org/opensearch/sql/executor => ppl/src/main/java/org/opensearch/sql/ppl}/autocomplete/GrammarBundle.java (59%)

diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
index 5cc8c72a631..466dfced2ce 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
@@ -7,6 +7,7 @@
 
 import static org.opensearch.rest.RestRequest.Method.GET;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableList;
 import java.io.IOException;
 import java.util.List;
@@ -16,7 +17,7 @@
 import org.opensearch.rest.BaseRestHandler;
 import org.opensearch.rest.BytesRestResponse;
 import org.opensearch.rest.RestRequest;
-import org.opensearch.sql.executor.autocomplete.GrammarBundle;
+import org.opensearch.sql.ppl.autocomplete.GrammarBundle;
 import org.opensearch.sql.ppl.autocomplete.PPLGrammarBundleBuilder;
 import org.opensearch.transport.client.node.NodeClient;
 
@@ -47,12 +48,9 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
     return channel -> {
       try {
         GrammarBundle bundle = getOrBuildBundle();
-
         XContentBuilder builder = channel.newBuilder();
         serializeBundle(builder, bundle);
-
         channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder));
-
       } catch (Exception e) {
         log.error("Error building or serializing PPL grammar", e);
         channel.sendResponse(new BytesRestResponse(channel, RestStatus.INTERNAL_SERVER_ERROR, e));
@@ -68,29 +66,27 @@ private GrammarBundle getOrBuildBundle() {
     synchronized (bundleLock) {
       // double-check after acquiring lock
       if (cachedBundle == null) {
-        log.info("Building PPL grammar bundle (first request)...");
-        long startTime = System.currentTimeMillis();
-
-        PPLGrammarBundleBuilder builder = new PPLGrammarBundleBuilder();
-        cachedBundle = builder.build();
-
-        long elapsed = System.currentTimeMillis() - startTime;
-        log.info("Built PPL grammar in {}ms (hash: {})", elapsed, cachedBundle.getGrammarHash());
+        cachedBundle = buildBundle();
       }
       return cachedBundle;
     }
   }
 
+  /** Constructs the grammar bundle. Override in tests to inject a custom or failing builder. */
+  @VisibleForTesting
+  protected GrammarBundle buildBundle() {
+    return new PPLGrammarBundleBuilder().build();
+  }
+
   /** Invalidate the cached bundle, forcing a rebuild on the next request. */
+  @VisibleForTesting
   public void invalidateCache() {
     synchronized (bundleLock) {
-      log.info("Invalidating cached PPL grammar bundle");
       cachedBundle = null;
     }
   }
 
-  private void serializeBundle(XContentBuilder builder, GrammarBundle bundle)
-      throws IOException {
+  private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) throws IOException {
     builder.startObject();
 
     // Identity & versioning
@@ -99,39 +95,18 @@ private void serializeBundle(XContentBuilder builder, GrammarBundle bundle)
     builder.field("startRuleIndex", bundle.getStartRuleIndex());
 
     // Lexer ATN & metadata
-    if (bundle.getLexerSerializedATN() != null) {
-      builder.field("lexerSerializedATN", bundle.getLexerSerializedATN());
-    }
-
-    if (bundle.getLexerRuleNames() != null) {
-      builder.field("lexerRuleNames", bundle.getLexerRuleNames());
-    }
-
-    if (bundle.getChannelNames() != null) {
-      builder.field("channelNames", bundle.getChannelNames());
-    }
-
-    if (bundle.getModeNames() != null) {
-      builder.field("modeNames", bundle.getModeNames());
-    }
+    builder.field("lexerSerializedATN", bundle.getLexerSerializedATN());
+    builder.field("lexerRuleNames", bundle.getLexerRuleNames());
+    builder.field("channelNames", bundle.getChannelNames());
+    builder.field("modeNames", bundle.getModeNames());
 
     // Parser ATN & metadata
-    if (bundle.getParserSerializedATN() != null) {
-      builder.field("parserSerializedATN", bundle.getParserSerializedATN());
-    }
-
-    if (bundle.getParserRuleNames() != null) {
-      builder.field("parserRuleNames", bundle.getParserRuleNames());
-    }
+    builder.field("parserSerializedATN", bundle.getParserSerializedATN());
+    builder.field("parserRuleNames", bundle.getParserRuleNames());
 
     // Vocabulary
-    if (bundle.getLiteralNames() != null) {
-      builder.field("literalNames", bundle.getLiteralNames());
-    }
-
-    if (bundle.getSymbolicNames() != null) {
-      builder.field("symbolicNames", bundle.getSymbolicNames());
-    }
+    builder.field("literalNames", bundle.getLiteralNames());
+    builder.field("symbolicNames", bundle.getSymbolicNames());
 
     builder.endObject();
   }
diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
index f58a8f0b5b4..cf68bdab221 100644
--- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -11,6 +11,7 @@
 import static org.mockito.Mockito.mock;
 
 import java.io.IOException;
+import java.util.concurrent.atomic.AtomicInteger;
 import org.junit.Before;
 import org.junit.Test;
 import org.opensearch.common.io.stream.BytesStreamOutput;
@@ -22,6 +23,7 @@
 import org.opensearch.rest.RestChannel;
 import org.opensearch.rest.RestRequest;
 import org.opensearch.rest.RestResponse;
+import org.opensearch.sql.ppl.autocomplete.GrammarBundle;
 import org.opensearch.test.rest.FakeRestRequest;
 import org.opensearch.transport.client.node.NodeClient;
 
@@ -78,13 +80,23 @@ public void testGetGrammar_ReturnsBundle() throws Exception {
 
   @Test
   public void testGetGrammar_BundleIsCached() throws Exception {
+    AtomicInteger buildCount = new AtomicInteger(0);
+    RestPPLGrammarAction countingAction =
+        new RestPPLGrammarAction() {
+          @Override
+          protected GrammarBundle buildBundle() {
+            buildCount.incrementAndGet();
+            return super.buildBundle();
+          }
+        };
+
     FakeRestRequest request1 =
         new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
             .withMethod(RestRequest.Method.GET)
             .withPath("/_plugins/_ppl/_grammar")
             .build();
     MockRestChannel channel1 = new MockRestChannel(request1, true);
-    action.handleRequest(request1, channel1, client);
+    countingAction.handleRequest(request1, channel1, client);
 
     FakeRestRequest request2 =
         new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
@@ -92,11 +104,13 @@ public void testGetGrammar_BundleIsCached() throws Exception {
             .withPath("/_plugins/_ppl/_grammar")
             .build();
     MockRestChannel channel2 = new MockRestChannel(request2, true);
-    action.handleRequest(request2, channel2, client);
+    countingAction.handleRequest(request2, channel2, client);
 
-    String content1 = channel1.getResponse().content().utf8ToString();
-    String content2 = channel2.getResponse().content().utf8ToString();
-    assertEquals("Consecutive requests should return identical content", content1, content2);
+    assertEquals("Bundle should be built exactly once", 1, buildCount.get());
+    assertEquals(
+        "Consecutive requests should return identical content",
+        channel1.getResponse().content().utf8ToString(),
+        channel2.getResponse().content().utf8ToString());
   }
 
   @Test
@@ -123,7 +137,29 @@ public void testInvalidateCache() throws Exception {
 
     String content1 = channel1.getResponse().content().utf8ToString();
     String content2 = channel2.getResponse().content().utf8ToString();
-    assertEquals("Grammar hash should be identical after cache invalidation and rebuild", content1, content2);
+    assertEquals(
+        "Grammar hash should be identical after cache invalidation and rebuild", content1, content2);
+  }
+
+  @Test
+  public void testGetGrammar_ErrorPath_Returns500() throws Exception {
+    RestPPLGrammarAction failingAction =
+        new RestPPLGrammarAction() {
+          @Override
+          protected GrammarBundle buildBundle() {
+            throw new RuntimeException("simulated build failure");
+          }
+        };
+
+    FakeRestRequest request =
+        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
+            .withMethod(RestRequest.Method.GET)
+            .withPath("/_plugins/_ppl/_grammar")
+            .build();
+    MockRestChannel channel = new MockRestChannel(request, true);
+    failingAction.handleRequest(request, channel, client);
+
+    assertEquals(RestStatus.INTERNAL_SERVER_ERROR, channel.getResponse().status());
   }
 
   /** Mock RestChannel to capture responses */
diff --git a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
similarity index 59%
rename from core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java
rename to ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
index 7b3720881aa..cd840d71e8c 100644
--- a/core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarBundle.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
@@ -3,46 +3,47 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-package org.opensearch.sql.executor.autocomplete;
+package org.opensearch.sql.ppl.autocomplete;
 
 import lombok.Builder;
 import lombok.Data;
+import lombok.NonNull;
 
-/** Response payload for the {@code GET /_plugins/_ppl/_grammar} endpoint. */
+/** Serialized ANTLR grammar data served by {@code GET /_plugins/_ppl/_grammar}. */
 @Data
 @Builder
 public class GrammarBundle {
 
   /** Bundle format version. */
-  private String bundleVersion;
+  @NonNull private String bundleVersion;
 
   /** SHA-256 hash of the serialized ATN data. Clients may use this to detect grammar changes. */
-  private String grammarHash;
+  @NonNull private String grammarHash;
 
   /** Serialized lexer ATN as int array (ATNSerializer output). */
-  private int[] lexerSerializedATN;
+  @NonNull private int[] lexerSerializedATN;
 
   /** Lexer rule names. */
-  private String[] lexerRuleNames;
+  @NonNull private String[] lexerRuleNames;
 
   /** Channel names (e.g. DEFAULT_TOKEN_CHANNEL, HIDDEN). */
-  private String[] channelNames;
+  @NonNull private String[] channelNames;
 
   /** Mode names (e.g. DEFAULT_MODE). */
-  private String[] modeNames;
+  @NonNull private String[] modeNames;
 
   /** Serialized parser ATN as int array (ATNSerializer output). */
-  private int[] parserSerializedATN;
+  @NonNull private int[] parserSerializedATN;
 
   /** Parser rule names. */
-  private String[] parserRuleNames;
+  @NonNull private String[] parserRuleNames;
 
   /** Start rule index (0 = root rule). */
   private int startRuleIndex;
 
   /** Literal token names indexed by token type (e.g. "'search'", "'|'"). */
-  private String[] literalNames;
+  @NonNull private String[] literalNames;
 
   /** Symbolic token names indexed by token type (e.g. "SEARCH", "PIPE"). */
-  private String[] symbolicNames;
+  @NonNull private String[] symbolicNames;
 }
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
index a5ee5bd4e43..7e8edeb0d73 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
@@ -12,13 +12,13 @@
 import org.antlr.v4.runtime.CommonTokenStream;
 import org.antlr.v4.runtime.Vocabulary;
 import org.antlr.v4.runtime.atn.ATNSerializer;
-import org.opensearch.sql.executor.autocomplete.GrammarBundle;
 import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLLexer;
 import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser;
 
 /** Builds the {@link GrammarBundle} for the PPL language from the generated ANTLR lexer/parser. */
 public class PPLGrammarBundleBuilder {
 
+  // Keep in sync with the antlr4 version in ppl/build.gradle.
   private static final String ANTLR_VERSION = "4.13.2";
   private static final String BUNDLE_VERSION = "1.0";
 
@@ -60,6 +60,8 @@ public GrammarBundle build() {
   private static String computeGrammarHash(int[] lexerATN, int[] parserATN) {
     try {
       MessageDigest digest = MessageDigest.getInstance("SHA-256");
+      // ANTLR4 serialized ATN values are bounded to 16 bits (unicode char range), so hashing
+      // 2 bytes per element captures the full value without loss.
       for (int v : lexerATN) {
         digest.update((byte) (v >> 8));
         digest.update((byte) v);
@@ -70,7 +72,9 @@ private static String computeGrammarHash(int[] lexerATN, int[] parserATN) {
       }
       digest.update(ANTLR_VERSION.getBytes(StandardCharsets.UTF_8));
       byte[] hash = digest.digest();
-      StringBuilder sb = new StringBuilder("sha256:");
+      // Output is always "sha256:" (7 chars) + 64 hex chars = 71 chars.
+      StringBuilder sb = new StringBuilder(71);
+      sb.append("sha256:");
       for (byte b : hash) {
         sb.append(String.format("%02x", b & 0xFF));
       }
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
index c2501090ab0..18e3cab01c2 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
@@ -11,7 +11,6 @@
 
 import org.junit.BeforeClass;
 import org.junit.Test;
-import org.opensearch.sql.executor.autocomplete.GrammarBundle;
 
 public class PPLGrammarBundleBuilderTest {
 
@@ -37,7 +36,7 @@ public void grammarHashHasExpectedFormat() {
     String hash = bundle.getGrammarHash();
     assertNotNull(hash);
     assertTrue("grammarHash should start with 'sha256:'", hash.startsWith("sha256:"));
-    assertTrue("grammarHash should be 71 chars (sha256: + 64 hex)", hash.length() == 71);
+    assertEquals("grammarHash should be 71 chars (sha256: + 64 hex)", 71, hash.length());
   }
 
   @Test

From 6ab349a267cb7d1ef9a45e844040bee1afa42373 Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Fri, 20 Feb 2026 14:25:15 -0800
Subject: [PATCH 07/20] Read ANTLR version from runtime

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../ppl/autocomplete/PPLGrammarBundleBuilder.java | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
index 7e8edeb0d73..7efb4e1b207 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
@@ -17,19 +17,20 @@
 
 /** Builds the {@link GrammarBundle} for the PPL language from the generated ANTLR lexer/parser. */
 public class PPLGrammarBundleBuilder {
-
-  // Keep in sync with the antlr4 version in ppl/build.gradle.
-  private static final String ANTLR_VERSION = "4.13.2";
+  private static final String ANTLR_VERSION = getAntlrVersion();
   private static final String BUNDLE_VERSION = "1.0";
 
+  private static String getAntlrVersion() {
+    Package antlrPackage = org.antlr.v4.runtime.RuntimeMetaData.class.getPackage();
+    String version = antlrPackage.getImplementationVersion();
+    return version != null ? version : "unknown";
+  }
+
   public GrammarBundle build() {
     OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString(""));
     CommonTokenStream tokens = new CommonTokenStream(lexer);
     OpenSearchPPLParser parser = new OpenSearchPPLParser(tokens);
 
-    // ATNSerializer re-serializes the ATN into the int[] format expected by antlr4ng.
-    // Do not use lexer.getSerializedATN().chars().toArray() — that yields raw UTF-16 char values
-    // which cause "state type 65535 is not valid" errors in the frontend deserializer.
     int[] lexerATN = new ATNSerializer(lexer.getATN()).serialize().toArray();
     int[] parserATN = new ATNSerializer(parser.getATN()).serialize().toArray();
 
@@ -60,8 +61,6 @@ public GrammarBundle build() {
   private static String computeGrammarHash(int[] lexerATN, int[] parserATN) {
     try {
       MessageDigest digest = MessageDigest.getInstance("SHA-256");
-      // ANTLR4 serialized ATN values are bounded to 16 bits (unicode char range), so hashing
-      // 2 bytes per element captures the full value without loss.
       for (int v : lexerATN) {
         digest.update((byte) (v >> 8));
         digest.update((byte) v);

From c6e4b55ba382d8b6a3a28f7185ba6d6a56e1017a Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Fri, 20 Feb 2026 14:45:20 -0800
Subject: [PATCH 08/20] add antlr version

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java   | 1 +
 .../opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java   | 1 +
 .../org/opensearch/sql/ppl/autocomplete/GrammarBundle.java     | 3 +++
 .../sql/ppl/autocomplete/PPLGrammarBundleBuilder.java          | 1 +
 4 files changed, 6 insertions(+)

diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
index 466dfced2ce..aecdab7b9f3 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
@@ -91,6 +91,7 @@ private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) thro
 
     // Identity & versioning
     builder.field("bundleVersion", bundle.getBundleVersion());
+    builder.field("antlrVersion", bundle.getAntlrVersion());
     builder.field("grammarHash", bundle.getGrammarHash());
     builder.field("startRuleIndex", bundle.getStartRuleIndex());
 
diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
index cf68bdab221..535f706f0b8 100644
--- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -68,6 +68,7 @@ public void testGetGrammar_ReturnsBundle() throws Exception {
 
     String content = response.content().utf8ToString();
     assertTrue(content.contains("\"bundleVersion\":\"1.0\""));
+    assertTrue(content.contains("\"antlrVersion\":"));
     assertTrue(content.contains("\"grammarHash\":\"sha256:"));
     assertTrue(content.contains("\"startRuleIndex\":0"));
     assertTrue(content.contains("\"lexerSerializedATN\":"));
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
index cd840d71e8c..7492ac6c17e 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
@@ -17,6 +17,9 @@ public class GrammarBundle {
   /** Bundle format version. */
   @NonNull private String bundleVersion;
 
+  /** ANTLR runtime version used to generate the grammar. */
+  @NonNull private String antlrVersion;
+
   /** SHA-256 hash of the serialized ATN data. Clients may use this to detect grammar changes. */
   @NonNull private String grammarHash;
 
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
index 7efb4e1b207..194f85e6a10 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
@@ -45,6 +45,7 @@ public GrammarBundle build() {
 
     return GrammarBundle.builder()
         .bundleVersion(BUNDLE_VERSION)
+        .antlrVersion(ANTLR_VERSION)
         .grammarHash(computeGrammarHash(lexerATN, parserATN))
         .lexerSerializedATN(lexerATN)
         .parserSerializedATN(parserATN)

From 67fd690c372c0cb3285a65492f7f2b22743e6786 Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Fri, 20 Feb 2026 15:00:06 -0800
Subject: [PATCH 09/20] spotless fix

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../sql/plugin/rest/RestPPLGrammarActionTest.java          | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
index 535f706f0b8..95825df39f9 100644
--- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -139,7 +139,9 @@ public void testInvalidateCache() throws Exception {
     String content1 = channel1.getResponse().content().utf8ToString();
     String content2 = channel2.getResponse().content().utf8ToString();
     assertEquals(
-        "Grammar hash should be identical after cache invalidation and rebuild", content1, content2);
+        "Grammar hash should be identical after cache invalidation and rebuild",
+        content1,
+        content2);
   }
 
   @Test
@@ -209,7 +211,8 @@ public XContentBuilder newErrorBuilder() throws IOException {
     }
 
     @Override
-    public XContentBuilder newBuilder(MediaType mediaType, boolean useFiltering) throws IOException {
+    public XContentBuilder newBuilder(MediaType mediaType, boolean useFiltering)
+        throws IOException {
       return XContentBuilder.builder(XContentType.JSON.xContent());
     }
 

From 19d2884247d843bced033d54f57693c1158f95f3 Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Sun, 22 Feb 2026 22:16:45 -0800
Subject: [PATCH 10/20] Address PR review: fix hash truncation, antlrVersion
 API, immutability, and tests

- Hash full 32-bit ints in grammarHash to avoid collisions with ANTLR 4.13.2 ATN serialization
- Use RuntimeMetaData.getRuntimeVersion() instead of unreliable JAR manifest lookup
- Make GrammarBundle immutable with @Value instead of @Data
- Update THIRD-PARTY to reflect ANTLR 4.13.2
- Harden tests with JSON parsing and add antlrVersion assertion

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 THIRD-PARTY                                   |  6 ++--
 .../plugin/rest/RestPPLGrammarActionTest.java | 36 +++++++++++++------
 .../sql/ppl/autocomplete/GrammarBundle.java   |  4 +--
 .../autocomplete/PPLGrammarBundleBuilder.java | 28 +++++++--------
 .../PPLGrammarBundleBuilderTest.java          |  9 +++++
 5 files changed, 53 insertions(+), 30 deletions(-)

diff --git a/THIRD-PARTY b/THIRD-PARTY
index 8469c53d5e5..4fe2762c7e8 100644
--- a/THIRD-PARTY
+++ b/THIRD-PARTY
@@ -467,15 +467,15 @@ DAMAGE.
 
 ------
 
-** ANTLR; version 4.7.1 -- https://github.com/antlr/antlr4
+** ANTLR; version 4.13.2 -- https://github.com/antlr/antlr4
 /*
- * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Copyright (c) 2012-2024 The ANTLR Project. All rights reserved.
  * Use of this file is governed by the BSD 3-clause license that
  * can be found in the LICENSE.txt file in the project root.
  */
 
 [The "BSD 3-clause license"]
-Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+Copyright (c) 2012-2024 The ANTLR Project. All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
index 95825df39f9..a7aa620219f 100644
--- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -12,6 +12,7 @@
 
 import java.io.IOException;
 import java.util.concurrent.atomic.AtomicInteger;
+import org.json.JSONObject;
 import org.junit.Before;
 import org.junit.Test;
 import org.opensearch.common.io.stream.BytesStreamOutput;
@@ -67,16 +68,31 @@ public void testGetGrammar_ReturnsBundle() throws Exception {
     assertEquals("Should return 200 OK", RestStatus.OK, response.status());
 
     String content = response.content().utf8ToString();
-    assertTrue(content.contains("\"bundleVersion\":\"1.0\""));
-    assertTrue(content.contains("\"antlrVersion\":"));
-    assertTrue(content.contains("\"grammarHash\":\"sha256:"));
-    assertTrue(content.contains("\"startRuleIndex\":0"));
-    assertTrue(content.contains("\"lexerSerializedATN\":"));
-    assertTrue(content.contains("\"parserSerializedATN\":"));
-    assertTrue(content.contains("\"lexerRuleNames\":"));
-    assertTrue(content.contains("\"parserRuleNames\":"));
-    assertTrue(content.contains("\"literalNames\":"));
-    assertTrue(content.contains("\"symbolicNames\":"));
+    JSONObject json = new JSONObject(content);
+
+    // Identity & versioning
+    assertEquals("1.0", json.getString("bundleVersion"));
+    assertTrue(
+        "antlrVersion should be a version string",
+        json.getString("antlrVersion").matches("\\d+\\.\\d+.*"));
+    assertTrue(
+        "grammarHash should start with sha256:",
+        json.getString("grammarHash").startsWith("sha256:"));
+    assertEquals(0, json.getInt("startRuleIndex"));
+
+    // Lexer ATN & metadata (non-empty arrays)
+    assertTrue(json.getJSONArray("lexerSerializedATN").length() > 0);
+    assertTrue(json.getJSONArray("lexerRuleNames").length() > 0);
+    assertTrue(json.getJSONArray("channelNames").length() > 0);
+    assertTrue(json.getJSONArray("modeNames").length() > 0);
+
+    // Parser ATN & metadata (non-empty arrays)
+    assertTrue(json.getJSONArray("parserSerializedATN").length() > 0);
+    assertTrue(json.getJSONArray("parserRuleNames").length() > 0);
+
+    // Vocabulary (non-empty arrays)
+    assertTrue(json.getJSONArray("literalNames").length() > 0);
+    assertTrue(json.getJSONArray("symbolicNames").length() > 0);
   }
 
   @Test
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
index 7492ac6c17e..0b733475994 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
@@ -6,11 +6,11 @@
 package org.opensearch.sql.ppl.autocomplete;
 
 import lombok.Builder;
-import lombok.Data;
 import lombok.NonNull;
+import lombok.Value;
 
 /** Serialized ANTLR grammar data served by {@code GET /_plugins/_ppl/_grammar}. */
-@Data
+@Value
 @Builder
 public class GrammarBundle {
 
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
index 194f85e6a10..c89ec7f1eed 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
@@ -17,15 +17,10 @@
 
 /** Builds the {@link GrammarBundle} for the PPL language from the generated ANTLR lexer/parser. */
 public class PPLGrammarBundleBuilder {
-  private static final String ANTLR_VERSION = getAntlrVersion();
+  private static final String ANTLR_VERSION =
+      org.antlr.v4.runtime.RuntimeMetaData.getRuntimeVersion();
   private static final String BUNDLE_VERSION = "1.0";
 
-  private static String getAntlrVersion() {
-    Package antlrPackage = org.antlr.v4.runtime.RuntimeMetaData.class.getPackage();
-    String version = antlrPackage.getImplementationVersion();
-    return version != null ? version : "unknown";
-  }
-
   public GrammarBundle build() {
     OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString(""));
     CommonTokenStream tokens = new CommonTokenStream(lexer);
@@ -62,14 +57,8 @@ public GrammarBundle build() {
   private static String computeGrammarHash(int[] lexerATN, int[] parserATN) {
     try {
       MessageDigest digest = MessageDigest.getInstance("SHA-256");
-      for (int v : lexerATN) {
-        digest.update((byte) (v >> 8));
-        digest.update((byte) v);
-      }
-      for (int v : parserATN) {
-        digest.update((byte) (v >> 8));
-        digest.update((byte) v);
-      }
+      updateDigest(digest, lexerATN);
+      updateDigest(digest, parserATN);
       digest.update(ANTLR_VERSION.getBytes(StandardCharsets.UTF_8));
       byte[] hash = digest.digest();
       // Output is always "sha256:" (7 chars) + 64 hex chars = 71 chars.
@@ -83,4 +72,13 @@ private static String computeGrammarHash(int[] lexerATN, int[] parserATN) {
       throw new IllegalStateException("SHA-256 not available", e);
     }
   }
+
+  private static void updateDigest(MessageDigest digest, int[] data) {
+    for (int v : data) {
+      digest.update((byte) (v >>> 24));
+      digest.update((byte) (v >>> 16));
+      digest.update((byte) (v >>> 8));
+      digest.update((byte) (v));
+    }
+  }
 }
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
index 18e3cab01c2..2083b63c212 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
@@ -31,6 +31,15 @@ public void bundleVersionIsSet() {
     assertEquals("1.0", bundle.getBundleVersion());
   }
 
+  @Test
+  public void antlrVersionIsSet() {
+    String version = bundle.getAntlrVersion();
+    assertNotNull("antlrVersion should not be null", version);
+    assertTrue(
+        "antlrVersion should look like a version string, got: " + version,
+        version.matches("\\d+\\.\\d+.*"));
+  }
+
   @Test
   public void grammarHashHasExpectedFormat() {
     String hash = bundle.getGrammarHash();

From f4f5700b727a29d3a70e8f54ff99197f7822a49b Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Sun, 22 Feb 2026 22:45:47 -0800
Subject: [PATCH 11/20] Mark grammar API as experimental

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../opensearch/sql/plugin/rest/RestPPLGrammarAction.java  | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
index aecdab7b9f3..873c261cc4e 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
@@ -12,6 +12,7 @@
 import java.io.IOException;
 import java.util.List;
 import lombok.extern.log4j.Log4j2;
+import org.opensearch.common.annotation.ExperimentalApi;
 import org.opensearch.core.rest.RestStatus;
 import org.opensearch.core.xcontent.XContentBuilder;
 import org.opensearch.rest.BaseRestHandler;
@@ -21,7 +22,12 @@
 import org.opensearch.sql.ppl.autocomplete.PPLGrammarBundleBuilder;
 import org.opensearch.transport.client.node.NodeClient;
 
-/** REST handler for {@code GET /_plugins/_ppl/_grammar}. */
+/*
+ * REST handler for {@code GET /_plugins/_ppl/_grammar}.
+ *
+ * @opensearch.experimental
+ */
+@ExperimentalApi
 @Log4j2
 public class RestPPLGrammarAction extends BaseRestHandler {
 

From 97079d5ba8b0991e48e7f70cee33a32299a47c62 Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Sun, 22 Feb 2026 23:27:53 -0800
Subject: [PATCH 12/20] Address review: ATN v4 guard, startRuleIndex by name,
 test hardening

- Assert ATN serialization version 4 for both lexer and parser to enforce
  antlr4ng compatibility contract
- Resolve startRuleIndex by looking up "root" rule name instead of hardcoding 0
- Fix MockRestChannel.bytesOutput() to return real BytesStreamOutput
- Document nullable elements in literalNames/symbolicNames Javadoc
- Rename test methods to follow testXxx() convention per ppl/plugin modules

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../plugin/rest/RestPPLGrammarActionTest.java |  2 +-
 .../sql/ppl/autocomplete/GrammarBundle.java   | 10 ++++-
 .../autocomplete/PPLGrammarBundleBuilder.java |  8 +++-
 .../PPLGrammarBundleBuilderTest.java          | 44 +++++++++++++------
 4 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
index a7aa620219f..ef10c7d6cbf 100644
--- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -241,7 +241,7 @@ public XContentBuilder newBuilder(
 
     @Override
     public BytesStreamOutput bytesOutput() {
-      return null;
+      return new BytesStreamOutput();
     }
   }
 }
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
index 0b733475994..2c508b3853e 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
@@ -44,9 +44,15 @@ public class GrammarBundle {
   /** Start rule index (0 = root rule). */
   private int startRuleIndex;
 
-  /** Literal token names indexed by token type (e.g. "'search'", "'|'"). */
+  /**
+   * Literal token names indexed by token type (e.g. "'search'", "'|'"). Elements may be null for
+   * tokens with no literal form; clients must handle sparse arrays.
+   */
   @NonNull private String[] literalNames;
 
-  /** Symbolic token names indexed by token type (e.g. "SEARCH", "PIPE"). */
+  /**
+   * Symbolic token names indexed by token type (e.g. "SEARCH", "PIPE"). Elements may be null for
+   * tokens with no symbolic name; clients must handle sparse arrays.
+   */
   @NonNull private String[] symbolicNames;
 }
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
index c89ec7f1eed..2ec5eada4f6 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
@@ -8,6 +8,7 @@
 import java.nio.charset.StandardCharsets;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
+import java.util.Arrays;
 import org.antlr.v4.runtime.CharStreams;
 import org.antlr.v4.runtime.CommonTokenStream;
 import org.antlr.v4.runtime.Vocabulary;
@@ -48,12 +49,17 @@ public GrammarBundle build() {
         .parserRuleNames(parser.getRuleNames())
         .channelNames(lexer.getChannelNames())
         .modeNames(lexer.getModeNames())
-        .startRuleIndex(0)
+        .startRuleIndex(resolveStartRuleIndex(parser.getRuleNames()))
         .literalNames(literalNames)
         .symbolicNames(symbolicNames)
         .build();
   }
 
+  private static int resolveStartRuleIndex(String[] ruleNames) {
+    int idx = Arrays.asList(ruleNames).indexOf("root");
+    return Math.max(idx, 0);
+  }
+
   private static String computeGrammarHash(int[] lexerATN, int[] parserATN) {
     try {
       MessageDigest digest = MessageDigest.getInstance("SHA-256");
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
index 2083b63c212..ddfc8876c62 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
@@ -14,6 +14,8 @@
 
 public class PPLGrammarBundleBuilderTest {
 
+  private static final int EXPECTED_ATN_SERIALIZATION_VERSION = 4;
+
   private static GrammarBundle bundle;
 
   @BeforeClass
@@ -22,17 +24,17 @@ public static void buildBundle() {
   }
 
   @Test
-  public void bundleIsNotNull() {
+  public void testBuildBundleNotNull() {
     assertNotNull(bundle);
   }
 
   @Test
-  public void bundleVersionIsSet() {
+  public void testBuildBundleVersionIsSet() {
     assertEquals("1.0", bundle.getBundleVersion());
   }
 
   @Test
-  public void antlrVersionIsSet() {
+  public void testBuildAntlrVersionIsSet() {
     String version = bundle.getAntlrVersion();
     assertNotNull("antlrVersion should not be null", version);
     assertTrue(
@@ -41,7 +43,7 @@ public void antlrVersionIsSet() {
   }
 
   @Test
-  public void grammarHashHasExpectedFormat() {
+  public void testBuildGrammarHashHasSha256Format() {
     String hash = bundle.getGrammarHash();
     assertNotNull(hash);
     assertTrue("grammarHash should start with 'sha256:'", hash.startsWith("sha256:"));
@@ -49,48 +51,64 @@ public void grammarHashHasExpectedFormat() {
   }
 
   @Test
-  public void startRuleIndexIsZero() {
+  public void testBuildStartRuleIndexIsZero() {
     assertEquals(0, bundle.getStartRuleIndex());
   }
 
   @Test
-  public void lexerATNIsNonEmpty() {
+  public void testBuildLexerATNIsNonEmpty() {
     assertNotNull(bundle.getLexerSerializedATN());
     assertTrue(bundle.getLexerSerializedATN().length > 0);
   }
 
   @Test
-  public void parserATNIsNonEmpty() {
+  public void testBuildLexerATNIsSerializationVersion4() {
+    assertEquals(
+        "Lexer ATN must be serialization version 4 for antlr4ng compatibility",
+        EXPECTED_ATN_SERIALIZATION_VERSION,
+        bundle.getLexerSerializedATN()[0]);
+  }
+
+  @Test
+  public void testBuildParserATNIsNonEmpty() {
     assertNotNull(bundle.getParserSerializedATN());
     assertTrue(bundle.getParserSerializedATN().length > 0);
   }
 
   @Test
-  public void lexerRuleNamesAreNonEmpty() {
+  public void testBuildParserATNIsSerializationVersion4() {
+    assertEquals(
+        "Parser ATN must be serialization version 4 for antlr4ng compatibility",
+        EXPECTED_ATN_SERIALIZATION_VERSION,
+        bundle.getParserSerializedATN()[0]);
+  }
+
+  @Test
+  public void testBuildLexerRuleNamesAreNonEmpty() {
     assertNotNull(bundle.getLexerRuleNames());
     assertTrue(bundle.getLexerRuleNames().length > 0);
   }
 
   @Test
-  public void parserRuleNamesAreNonEmpty() {
+  public void testBuildParserRuleNamesAreNonEmpty() {
     assertNotNull(bundle.getParserRuleNames());
     assertTrue(bundle.getParserRuleNames().length > 0);
   }
 
   @Test
-  public void channelNamesAreNonEmpty() {
+  public void testBuildChannelNamesAreNonEmpty() {
     assertNotNull(bundle.getChannelNames());
     assertTrue(bundle.getChannelNames().length > 0);
   }
 
   @Test
-  public void modeNamesAreNonEmpty() {
+  public void testBuildModeNamesAreNonEmpty() {
     assertNotNull(bundle.getModeNames());
     assertTrue(bundle.getModeNames().length > 0);
   }
 
   @Test
-  public void vocabularyIsNonEmpty() {
+  public void testBuildVocabularyIsNonEmpty() {
     assertNotNull(bundle.getLiteralNames());
     assertNotNull(bundle.getSymbolicNames());
     assertTrue(bundle.getLiteralNames().length > 0);
@@ -98,7 +116,7 @@ public void vocabularyIsNonEmpty() {
   }
 
   @Test
-  public void buildIsDeterministic() {
+  public void testBuildIsDeterministic() {
     GrammarBundle second = new PPLGrammarBundleBuilder().build();
     assertEquals(
         "Two builds of the same grammar should produce the same hash",

From 3c6748b54444c7f49624a402cd8db3630db90050 Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Mon, 23 Feb 2026 11:55:51 -0800
Subject: [PATCH 13/20] Reduce invalidateCache() visibility from public to
 protected

Consistent with buildBundle() which is also @VisibleForTesting protected.

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
index 873c261cc4e..b879987fc63 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
@@ -86,7 +86,7 @@ protected GrammarBundle buildBundle() {
 
   /** Invalidate the cached bundle, forcing a rebuild on the next request. */
   @VisibleForTesting
-  public void invalidateCache() {
+  protected void invalidateCache() {
     synchronized (bundleLock) {
       cachedBundle = null;
     }

From 9487e700037fc0df4df7aabe540fd2d9f6b9e259 Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Sun, 1 Mar 2026 17:23:40 -0800
Subject: [PATCH 14/20] add more necessary fields

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../sql/plugin/rest/RestPPLGrammarAction.java |   5 +
 .../plugin/rest/RestPPLGrammarActionTest.java |   5 +
 .../sql/ppl/autocomplete/GrammarBundle.java   |  23 +++
 .../autocomplete/PPLGrammarBundleBuilder.java | 133 ++++++++++++++++++
 .../PPLGrammarBundleBuilderTest.java          |  36 +++++
 5 files changed, 202 insertions(+)

diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
index b879987fc63..ca3442d0272 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
@@ -115,6 +115,11 @@ private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) thro
     builder.field("literalNames", bundle.getLiteralNames());
     builder.field("symbolicNames", bundle.getSymbolicNames());
 
+    // Autocomplete configuration
+    builder.field("tokenDictionary", bundle.getTokenDictionary());
+    builder.field("ignoredTokens", bundle.getIgnoredTokens());
+    builder.field("rulesToVisit", bundle.getRulesToVisit());
+
     builder.endObject();
   }
 }
diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
index ef10c7d6cbf..bc05d08192a 100644
--- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -93,6 +93,11 @@ public void testGetGrammar_ReturnsBundle() throws Exception {
     // Vocabulary (non-empty arrays)
     assertTrue(json.getJSONArray("literalNames").length() > 0);
     assertTrue(json.getJSONArray("symbolicNames").length() > 0);
+
+    // Autocomplete configuration
+    assertTrue(json.getJSONObject("tokenDictionary").length() > 0);
+    assertTrue(json.getJSONArray("ignoredTokens").length() > 0);
+    assertTrue(json.getJSONArray("rulesToVisit").length() > 0);
   }
 
   @Test
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
index 2c508b3853e..f0a8c8968cc 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
@@ -5,6 +5,7 @@
 
 package org.opensearch.sql.ppl.autocomplete;
 
+import java.util.Map;
 import lombok.Builder;
 import lombok.NonNull;
 import lombok.Value;
@@ -55,4 +56,26 @@ public class GrammarBundle {
    * tokens with no symbolic name; clients must handle sparse arrays.
    */
   @NonNull private String[] symbolicNames;
+
+  /**
+   * Autocomplete token dictionary — maps semantic names used by the autocomplete enrichment logic
+   * (e.g. "SPACE", "PIPE", "SOURCE") to their token type IDs in this grammar. Clients use this
+   * to configure token-aware enrichment without hardcoding token IDs.
+   */
+  @NonNull private Map<String, Integer> tokenDictionary;
+
+  /**
+   * Token type IDs that should be ignored by CodeCompletionCore during candidate collection.
+   * These are tokens like functions, operators, and internal tokens that should not appear
+   * as direct keyword suggestions (e.g. AVG, COUNT, PIPE operators).
+   */
+  @NonNull private int[] ignoredTokens;
+
+  /**
+   * Parser rule indices that CodeCompletionCore should treat as preferred rules.
+   * When these rules are candidate alternatives, CodeCompletionCore reports them as rule
+   * candidates instead of expanding into their child tokens. The autocomplete enrichment
+   * uses these to trigger semantic suggestions (e.g. suggest fields, suggest tables).
+   */
+  @NonNull private int[] rulesToVisit;
 }
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
index 2ec5eada4f6..d030d20e24e 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
@@ -8,7 +8,13 @@
 import java.nio.charset.StandardCharsets;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
 import org.antlr.v4.runtime.CharStreams;
 import org.antlr.v4.runtime.CommonTokenStream;
 import org.antlr.v4.runtime.Vocabulary;
@@ -52,9 +58,136 @@ public GrammarBundle build() {
         .startRuleIndex(resolveStartRuleIndex(parser.getRuleNames()))
         .literalNames(literalNames)
         .symbolicNames(symbolicNames)
+        .tokenDictionary(buildTokenDictionary(vocabulary))
+        .ignoredTokens(buildIgnoredTokens())
+        .rulesToVisit(buildRulesToVisit(parser.getRuleNames()))
         .build();
   }
 
+  /**
+   * Build the token dictionary — semantic name → token type ID mapping. Uses lexer constants
+   * since token type IDs are defined by the lexer. The frontend autocomplete enrichment uses
+   * these to identify tokens like SPACE, PIPE, SOURCE by name.
+   */
+  private static Map<String, Integer> buildTokenDictionary(Vocabulary vocabulary) {
+    Map<String, Integer> dict = new LinkedHashMap<>();
+    // SPACE token may not exist in this grammar (whitespace may be implicitly skipped).
+    // Resolve by searching symbolic names; use -1 if not found.
+    dict.put("WHITESPACE", OpenSearchPPLLexer.WHITESPACE);
+    dict.put("FROM", OpenSearchPPLLexer.FROM);
+    dict.put("OPENING_BRACKET", OpenSearchPPLLexer.LT_PRTHS);
+    dict.put("CLOSING_BRACKET", OpenSearchPPLLexer.RT_PRTHS);
+    dict.put("SEARCH", OpenSearchPPLLexer.SEARCH);
+    dict.put("SOURCE", OpenSearchPPLLexer.SOURCE);
+    dict.put("PIPE", OpenSearchPPLLexer.PIPE);
+    dict.put("ID", OpenSearchPPLLexer.ID);
+    dict.put("EQUAL", OpenSearchPPLLexer.EQUAL);
+    dict.put("IN", OpenSearchPPLLexer.IN);
+    dict.put("COMMA", OpenSearchPPLLexer.COMMA);
+    dict.put("BACKTICK_QUOTE", OpenSearchPPLLexer.BQUOTA_STRING);
+    dict.put("DOT", OpenSearchPPLLexer.DOT);
+    return dict;
+  }
+
+  /**
+   * Build the list of token type IDs to ignore for autocomplete. Mirrors the frontend
+   * getIgnoredTokens() logic: explicitly ignore AS/IN, then ignore two contiguous token ranges
+   * minus operatorsToInclude.
+   *
+   * <p>Range 1 (relevance/internal tokens): MATCH .. ERROR_RECOGNITION — covers relevance
+   * functions, search parameters, span literals, IDs, quoted strings, and error tokens.
+   *
+   * <p>Range 2 (keywords/functions/operators): CASE .. CAST — covers CASE/ELSE, IN, EXISTS,
+   * NOT/OR/AND/XOR, TRUE/FALSE, REGEXP, datetime parts, data type keywords, punctuation,
+   * aggregate functions, math/text/date functions, and CAST.
+   *
+   * <p>Tokens in {@code operatorsToInclude} are kept as suggestions even if they fall within
+   * an ignored range.
+   */
+  private static int[] buildIgnoredTokens() {
+    // Verify range boundaries match expected token IDs. If the grammar changes and
+    // shifts token ordinals, these assertions surface the problem at build time.
+    assert OpenSearchPPLParser.MATCH == 427
+        : "MATCH token ID shifted — update ignored range start";
+    assert OpenSearchPPLParser.ERROR_RECOGNITION == 488
+        : "ERROR_RECOGNITION token ID shifted — update ignored range end";
+    assert OpenSearchPPLParser.CASE == 142
+        : "CASE token ID shifted — update ignored range start";
+    assert OpenSearchPPLParser.CAST == 387
+        : "CAST token ID shifted — update ignored range end";
+
+    Set<Integer> operatorsToInclude = new HashSet<>(Arrays.asList(
+        OpenSearchPPLParser.PIPE, OpenSearchPPLParser.EQUAL, OpenSearchPPLParser.COMMA,
+        OpenSearchPPLParser.NOT_EQUAL, OpenSearchPPLParser.LESS, OpenSearchPPLParser.NOT_LESS,
+        OpenSearchPPLParser.GREATER, OpenSearchPPLParser.NOT_GREATER,
+        OpenSearchPPLParser.OR, OpenSearchPPLParser.AND,
+        OpenSearchPPLParser.LT_PRTHS, OpenSearchPPLParser.RT_PRTHS,
+        OpenSearchPPLParser.SPAN,
+        OpenSearchPPLParser.MATCH, OpenSearchPPLParser.MATCH_PHRASE,
+        OpenSearchPPLParser.MATCH_BOOL_PREFIX, OpenSearchPPLParser.MATCH_PHRASE_PREFIX,
+        OpenSearchPPLParser.SQUOTA_STRING
+    ));
+
+    List<Integer> ignored = new ArrayList<>();
+    ignored.add(OpenSearchPPLParser.AS);
+    ignored.add(OpenSearchPPLParser.IN);
+
+    // Range 1: MATCH .. ERROR_RECOGNITION
+    for (int i = OpenSearchPPLParser.MATCH; i <= OpenSearchPPLParser.ERROR_RECOGNITION; i++) {
+      if (!operatorsToInclude.contains(i)) {
+        ignored.add(i);
+      }
+    }
+
+    // Range 2: CASE .. CAST
+    for (int i = OpenSearchPPLParser.CASE; i <= OpenSearchPPLParser.CAST; i++) {
+      if (!operatorsToInclude.contains(i)) {
+        ignored.add(i);
+      }
+    }
+
+    return ignored.stream().mapToInt(Integer::intValue).toArray();
+  }
+
+  /**
+   * Build the list of parser rule indices for CodeCompletionCore preferredRules.
+   * These rules trigger semantic suggestions (suggest fields, tables, functions, etc.).
+   *
+   * @throws IllegalStateException if any expected rule name is not found in the parser grammar
+   */
+  private static int[] buildRulesToVisit(String[] ruleNames) {
+    List<String> ruleNamesToVisit = Arrays.asList(
+        "statsFunctionName", "takeAggFunction", "integerLiteral", "decimalLiteral",
+        "keywordsCanBeId", "renameClasue", "qualifiedName", "tableQualifiedName",
+        "wcQualifiedName", "positionFunctionName", "searchableKeyWord", "stringLiteral",
+        "searchCommand", "searchComparisonOperator", "comparisonOperator", "sqlLikeJoinType"
+    );
+
+    List<String> ruleNamesList = Arrays.asList(ruleNames);
+    int[] indices = new int[ruleNamesToVisit.size()];
+    for (int i = 0; i < ruleNamesToVisit.size(); i++) {
+      String name = ruleNamesToVisit.get(i);
+      int idx = ruleNamesList.indexOf(name);
+      if (idx < 0) {
+        throw new IllegalStateException(
+            "Parser rule '" + name + "' not found in grammar — "
+                + "was it renamed or removed from OpenSearchPPLParser.g4?");
+      }
+      indices[i] = idx;
+    }
+    return indices;
+  }
+
+  /** Resolve a token type ID from the vocabulary by symbolic name. Returns -1 if not found. */
+  private static int resolveTokenType(Vocabulary vocabulary, String name) {
+    for (int i = 0; i <= vocabulary.getMaxTokenType(); i++) {
+      if (name.equals(vocabulary.getSymbolicName(i))) {
+        return i;
+      }
+    }
+    return -1;
+  }
+
   private static int resolveStartRuleIndex(String[] ruleNames) {
     int idx = Arrays.asList(ruleNames).indexOf("root");
     return Math.max(idx, 0);
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
index ddfc8876c62..49115db5143 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
@@ -9,8 +9,10 @@
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
+import java.util.Map;
 import org.junit.BeforeClass;
 import org.junit.Test;
+import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser;
 
 public class PPLGrammarBundleBuilderTest {
 
@@ -123,4 +125,38 @@ public void testBuildIsDeterministic() {
         bundle.getGrammarHash(),
         second.getGrammarHash());
   }
+
+  @Test
+  public void testTokenDictionaryContainsExpectedEntries() {
+    Map<String, Integer> dict = bundle.getTokenDictionary();
+    assertNotNull(dict);
+    assertEquals((Integer) OpenSearchPPLParser.PIPE, dict.get("PIPE"));
+    assertEquals((Integer) OpenSearchPPLParser.SOURCE, dict.get("SOURCE"));
+    assertEquals((Integer) OpenSearchPPLParser.FROM, dict.get("FROM"));
+    assertEquals((Integer) OpenSearchPPLParser.EQUAL, dict.get("EQUAL"));
+    assertEquals((Integer) OpenSearchPPLParser.ID, dict.get("ID"));
+  }
+
+  @Test
+  public void testIgnoredTokensAreNonEmpty() {
+    assertNotNull(bundle.getIgnoredTokens());
+    assertTrue("ignoredTokens should not be empty", bundle.getIgnoredTokens().length > 0);
+  }
+
+  @Test
+  public void testRulesToVisitAreNonEmpty() {
+    assertNotNull(bundle.getRulesToVisit());
+    assertTrue("rulesToVisit should not be empty", bundle.getRulesToVisit().length > 0);
+  }
+
+  @Test
+  public void testIgnoredRangeBoundariesMatchGrammar() {
+    // These assertions mirror the runtime assertions in buildIgnoredTokens().
+    // If the grammar changes token ordinals, both this test and the builder assertions
+    // will flag the issue.
+    assertEquals("MATCH token ID", 427, OpenSearchPPLParser.MATCH);
+    assertEquals("ERROR_RECOGNITION token ID", 488, OpenSearchPPLParser.ERROR_RECOGNITION);
+    assertEquals("CASE token ID", 142, OpenSearchPPLParser.CASE);
+    assertEquals("CAST token ID", 387, OpenSearchPPLParser.CAST);
+  }
 }

From 2f42c0f44a2840b6b8d4091317b96f2811b08b5c Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Tue, 3 Mar 2026 06:29:55 -0800
Subject: [PATCH 15/20] adjusting ignore token set to be lexical/internal only

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../autocomplete/PPLGrammarBundleBuilder.java |  83 ++++++--------
 .../PPLGrammarBundleBuilderTest.java          | 107 ++++++++++++++++--
 2 files changed, 132 insertions(+), 58 deletions(-)

diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
index d030d20e24e..fc3cdb6b510 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
@@ -27,6 +27,22 @@ public class PPLGrammarBundleBuilder {
   private static final String ANTLR_VERSION =
       org.antlr.v4.runtime.RuntimeMetaData.getRuntimeVersion();
   private static final String BUNDLE_VERSION = "1.0";
+  private static final Set<String> INTERNAL_NON_LITERAL_TOKENS =
+      new HashSet<>(
+          Arrays.asList(
+              "ID",
+              "NUMERIC_ID",
+              "ID_DATE_SUFFIX",
+              "CLUSTER",
+              "TIME_SNAP",
+              "SPANLENGTH",
+              "DECIMAL_SPANLENGTH",
+              "DQUOTA_STRING",
+              "SQUOTA_STRING",
+              "BQUOTA_STRING",
+              "LINE_COMMENT",
+              "BLOCK_COMMENT",
+              "ERROR_RECOGNITION"));
 
   public GrammarBundle build() {
     OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString(""));
@@ -59,7 +75,7 @@ public GrammarBundle build() {
         .literalNames(literalNames)
         .symbolicNames(symbolicNames)
         .tokenDictionary(buildTokenDictionary(vocabulary))
-        .ignoredTokens(buildIgnoredTokens())
+        .ignoredTokens(buildIgnoredTokens(vocabulary))
         .rulesToVisit(buildRulesToVisit(parser.getRuleNames()))
         .build();
   }
@@ -90,65 +106,32 @@ private static Map<String, Integer> buildTokenDictionary(Vocabulary vocabulary)
   }
 
   /**
-   * Build the list of token type IDs to ignore for autocomplete. Mirrors the frontend
-   * getIgnoredTokens() logic: explicitly ignore AS/IN, then ignore two contiguous token ranges
-   * minus operatorsToInclude.
+   * Build token type IDs to ignore for autocomplete.
    *
-   * <p>Range 1 (relevance/internal tokens): MATCH .. ERROR_RECOGNITION — covers relevance
-   * functions, search parameters, span literals, IDs, quoted strings, and error tokens.
-   *
-   * <p>Range 2 (keywords/functions/operators): CASE .. CAST — covers CASE/ELSE, IN, EXISTS,
-   * NOT/OR/AND/XOR, TRUE/FALSE, REGEXP, datetime parts, data type keywords, punctuation,
-   * aggregate functions, math/text/date functions, and CAST.
-   *
-   * <p>Tokens in {@code operatorsToInclude} are kept as suggestions even if they fall within
-   * an ignored range.
+   * <p>Only lexical/internal tokens are ignored (identifiers, literals, quoted-string tokens,
+   * comments, and error token). User-facing commands/functions/operators are intentionally kept so
+   * completion dynamically reflects grammar changes.
    */
-  private static int[] buildIgnoredTokens() {
-    // Verify range boundaries match expected token IDs. If the grammar changes and
-    // shifts token ordinals, these assertions surface the problem at build time.
-    assert OpenSearchPPLParser.MATCH == 427
-        : "MATCH token ID shifted — update ignored range start";
-    assert OpenSearchPPLParser.ERROR_RECOGNITION == 488
-        : "ERROR_RECOGNITION token ID shifted — update ignored range end";
-    assert OpenSearchPPLParser.CASE == 142
-        : "CASE token ID shifted — update ignored range start";
-    assert OpenSearchPPLParser.CAST == 387
-        : "CAST token ID shifted — update ignored range end";
-
-    Set<Integer> operatorsToInclude = new HashSet<>(Arrays.asList(
-        OpenSearchPPLParser.PIPE, OpenSearchPPLParser.EQUAL, OpenSearchPPLParser.COMMA,
-        OpenSearchPPLParser.NOT_EQUAL, OpenSearchPPLParser.LESS, OpenSearchPPLParser.NOT_LESS,
-        OpenSearchPPLParser.GREATER, OpenSearchPPLParser.NOT_GREATER,
-        OpenSearchPPLParser.OR, OpenSearchPPLParser.AND,
-        OpenSearchPPLParser.LT_PRTHS, OpenSearchPPLParser.RT_PRTHS,
-        OpenSearchPPLParser.SPAN,
-        OpenSearchPPLParser.MATCH, OpenSearchPPLParser.MATCH_PHRASE,
-        OpenSearchPPLParser.MATCH_BOOL_PREFIX, OpenSearchPPLParser.MATCH_PHRASE_PREFIX,
-        OpenSearchPPLParser.SQUOTA_STRING
-    ));
-
+  private static int[] buildIgnoredTokens(Vocabulary vocabulary) {
     List<Integer> ignored = new ArrayList<>();
-    ignored.add(OpenSearchPPLParser.AS);
-    ignored.add(OpenSearchPPLParser.IN);
 
-    // Range 1: MATCH .. ERROR_RECOGNITION
-    for (int i = OpenSearchPPLParser.MATCH; i <= OpenSearchPPLParser.ERROR_RECOGNITION; i++) {
-      if (!operatorsToInclude.contains(i)) {
-        ignored.add(i);
-      }
-    }
-
-    // Range 2: CASE .. CAST
-    for (int i = OpenSearchPPLParser.CASE; i <= OpenSearchPPLParser.CAST; i++) {
-      if (!operatorsToInclude.contains(i)) {
-        ignored.add(i);
+    for (int tokenType = 0; tokenType <= vocabulary.getMaxTokenType(); tokenType++) {
+      String symbolicName = vocabulary.getSymbolicName(tokenType);
+      if (isLexicalInternalToken(symbolicName)) {
+        ignored.add(tokenType);
       }
     }
 
     return ignored.stream().mapToInt(Integer::intValue).toArray();
   }
 
+  private static boolean isLexicalInternalToken(String symbolicName) {
+    if (symbolicName == null) {
+      return false;
+    }
+    return symbolicName.endsWith("_LITERAL") || INTERNAL_NON_LITERAL_TOKENS.contains(symbolicName);
+  }
+
   /**
    * Build the list of parser rule indices for CodeCompletionCore preferredRules.
    * These rules trigger semantic suggestions (suggest fields, tables, functions, etc.).
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
index 49115db5143..fb5dd4dac9b 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
@@ -6,10 +6,14 @@
 package org.opensearch.sql.ppl.autocomplete;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
+import java.util.Arrays;
+import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser;
@@ -17,6 +21,22 @@
 public class PPLGrammarBundleBuilderTest {
 
   private static final int EXPECTED_ATN_SERIALIZATION_VERSION = 4;
+  private static final Set<String> EXPECTED_IGNORED_NON_LITERAL_SYMBOLS =
+      new HashSet<>(
+          Arrays.asList(
+              "ID",
+              "NUMERIC_ID",
+              "ID_DATE_SUFFIX",
+              "CLUSTER",
+              "TIME_SNAP",
+              "SPANLENGTH",
+              "DECIMAL_SPANLENGTH",
+              "DQUOTA_STRING",
+              "SQUOTA_STRING",
+              "BQUOTA_STRING",
+              "LINE_COMMENT",
+              "BLOCK_COMMENT",
+              "ERROR_RECOGNITION"));
 
   private static GrammarBundle bundle;
 
@@ -150,13 +170,84 @@ public void testRulesToVisitAreNonEmpty() {
   }
 
   @Test
-  public void testIgnoredRangeBoundariesMatchGrammar() {
-    // These assertions mirror the runtime assertions in buildIgnoredTokens().
-    // If the grammar changes token ordinals, both this test and the builder assertions
-    // will flag the issue.
-    assertEquals("MATCH token ID", 427, OpenSearchPPLParser.MATCH);
-    assertEquals("ERROR_RECOGNITION token ID", 488, OpenSearchPPLParser.ERROR_RECOGNITION);
-    assertEquals("CASE token ID", 142, OpenSearchPPLParser.CASE);
-    assertEquals("CAST token ID", 387, OpenSearchPPLParser.CAST);
+  public void testIgnoredTokensContainOnlyLexicalInternalTokens() {
+    Set<Integer> ignored = ignoredTokenSet();
+    for (Integer tokenType : ignored) {
+      String symbol = bundle.getSymbolicNames()[tokenType];
+      assertTrue(
+          "ignoredTokens should contain only lexical/internal tokens, got: "
+              + symbol
+              + " ("
+              + tokenType
+              + ")",
+          symbol != null
+              && (symbol.endsWith("_LITERAL")
+                  || EXPECTED_IGNORED_NON_LITERAL_SYMBOLS.contains(symbol)));
+    }
+  }
+
+  @Test
+  public void testCommandAndKeywordTokensAreNotIgnored() {
+    Set<Integer> ignored = ignoredTokenSet();
+    assertFalse("LOOKUP should not be ignored", ignored.contains(OpenSearchPPLParser.LOOKUP));
+    assertFalse("REPLACE should not be ignored", ignored.contains(OpenSearchPPLParser.REPLACE));
+    assertFalse("REVERSE should not be ignored", ignored.contains(OpenSearchPPLParser.REVERSE));
+    assertFalse("MVCOMBINE should not be ignored", ignored.contains(OpenSearchPPLParser.MVCOMBINE));
+    assertFalse("MVEXPAND should not be ignored", ignored.contains(OpenSearchPPLParser.MVEXPAND));
+    assertFalse("LEFT should not be ignored", ignored.contains(OpenSearchPPLParser.LEFT));
+    assertFalse("RIGHT should not be ignored", ignored.contains(OpenSearchPPLParser.RIGHT));
+    assertFalse("AS should not be ignored", ignored.contains(OpenSearchPPLParser.AS));
+    assertFalse("IN should not be ignored", ignored.contains(OpenSearchPPLParser.IN));
+  }
+
+  @Test
+  public void testExpressionFunctionTokensAreNotIgnored() {
+    Set<Integer> ignored = ignoredTokenSet();
+    assertFalse("MVAPPEND should not be ignored", ignored.contains(OpenSearchPPLParser.MVAPPEND));
+    assertFalse("MVJOIN should not be ignored", ignored.contains(OpenSearchPPLParser.MVJOIN));
+    assertFalse("MVINDEX should not be ignored", ignored.contains(OpenSearchPPLParser.MVINDEX));
+  }
+
+  @Test
+  public void testNewerGrammarKeywordsAreNotIgnoredWhenPresent() {
+    // These tokens exist in newer grammar variants (for example graph lookup support).
+    // Keep this test tolerant so it works across branches with different grammar revisions.
+    assertTokenNotIgnoredIfPresent("GRAPHLOOKUP");
+    assertTokenNotIgnoredIfPresent("START_FIELD");
+    assertTokenNotIgnoredIfPresent("FROM_FIELD");
+    assertTokenNotIgnoredIfPresent("TO_FIELD");
+    assertTokenNotIgnoredIfPresent("MAX_DEPTH");
+    assertTokenNotIgnoredIfPresent("DEPTH_FIELD");
+    assertTokenNotIgnoredIfPresent("DIRECTION");
+    assertTokenNotIgnoredIfPresent("UNI");
+    assertTokenNotIgnoredIfPresent("BI");
+    assertTokenNotIgnoredIfPresent("SUPPORT_ARRAY");
+    assertTokenNotIgnoredIfPresent("BATCH_MODE");
+    assertTokenNotIgnoredIfPresent("USE_PIT");
+  }
+
+  private static Set<Integer> ignoredTokenSet() {
+    Set<Integer> ignored = new HashSet<>();
+    for (int tokenType : bundle.getIgnoredTokens()) {
+      ignored.add(tokenType);
+    }
+    return ignored;
+  }
+
+  private static void assertTokenNotIgnoredIfPresent(String symbolicTokenName) {
+    int tokenType = tokenTypeBySymbolicName(symbolicTokenName);
+    if (tokenType >= 0) {
+      assertFalse(symbolicTokenName + " should not be ignored", ignoredTokenSet().contains(tokenType));
+    }
+  }
+
+  private static int tokenTypeBySymbolicName(String symbolicTokenName) {
+    String[] symbols = bundle.getSymbolicNames();
+    for (int i = 0; i < symbols.length; i++) {
+      if (symbolicTokenName.equals(symbols[i])) {
+        return i;
+      }
+    }
+    return -1;
   }
 }

From 6dae31b50059552e0e725479e43a0f9056d1f151 Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Tue, 3 Mar 2026 07:14:37 -0800
Subject: [PATCH 16/20] addressed fix-now comments

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 docs/user/ppl/interfaces/endpoint.md          | 32 ++++++
 .../rest-api-spec/api/ppl.grammar.json        | 18 ++++
 .../rest-api-spec/test/api/ppl.grammar.yml    | 18 ++++
 .../sql/plugin/rest/RestPPLGrammarAction.java | 27 ++---
 .../plugin/rest/RestPPLGrammarActionTest.java | 46 +++++++--
 .../sql/ppl/autocomplete/GrammarBundle.java   | 98 +++++++++++++++++--
 .../autocomplete/PPLGrammarBundleBuilder.java | 72 +++++++++++---
 .../PPLGrammarBundleBuilderTest.java          | 38 ++++++-
 8 files changed, 296 insertions(+), 53 deletions(-)
 create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/api/ppl.grammar.json
 create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/api/ppl.grammar.yml

diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md
index d5958ba3250..b52412880db 100644
--- a/docs/user/ppl/interfaces/endpoint.md
+++ b/docs/user/ppl/interfaces/endpoint.md
@@ -201,3 +201,35 @@ Expected output (trimmed):
 - Plan node names use Calcite physical operator names (for example, `EnumerableCalc` or `CalciteEnumerableIndexScan`).
 - Plan `time_ms` is inclusive of child operators and represents wall-clock time; overlapping work can make summed plan times exceed `summary.total_time_ms`.
 - Scan nodes reflect operator wall-clock time; background prefetch can make scan time smaller than total request latency.
+
+## Grammar (Experimental)
+
+### Description
+
+You can send an HTTP GET request to endpoint **/_plugins/_ppl/_grammar** to fetch serialized PPL grammar metadata used by autocomplete clients.
+
+### Example
+
+```bash
+curl -sS -X GET localhost:9200/_plugins/_ppl/_grammar
+```
+
+Expected output (trimmed):
+
+```json
+{
+  "bundleVersion": "1.0",
+  "antlrVersion": "4.13.2",
+  "grammarHash": "sha256:...",
+  "startRuleIndex": 0,
+  "lexerSerializedATN": [4, ...],
+  "parserSerializedATN": [4, ...],
+  "lexerRuleNames": ["SEARCH", "..."],
+  "parserRuleNames": ["root", "..."],
+  "literalNames": [null, "'SEARCH'", "..."],
+  "symbolicNames": [null, "SEARCH", "..."],
+  "tokenDictionary": {"PIPE": 196, "...": 0},
+  "ignoredTokens": [472, 473, "..."],
+  "rulesToVisit": [200, 201, "..."]
+}
+```
diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/api/ppl.grammar.json b/integ-test/src/yamlRestTest/resources/rest-api-spec/api/ppl.grammar.json
new file mode 100644
index 00000000000..d2be3e0189d
--- /dev/null
+++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/api/ppl.grammar.json
@@ -0,0 +1,18 @@
+{
+  "ppl.grammar": {
+    "documentation": {
+      "url": "https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/interfaces/endpoint.md",
+      "description": "PPL Grammar Endpoint for Autocomplete"
+    },
+    "stability": "experimental",
+    "url": {
+      "paths": [
+        {
+          "path": "/_plugins/_ppl/_grammar",
+          "methods": ["GET"]
+        }
+      ]
+    },
+    "params": {}
+  }
+}
diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/api/ppl.grammar.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/api/ppl.grammar.yml
new file mode 100644
index 00000000000..bb1ade36bda
--- /dev/null
+++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/api/ppl.grammar.yml
@@ -0,0 +1,18 @@
+---
+"PPL grammar endpoint returns expected response shape":
+  - do:
+      ppl.grammar: {}
+  - is_true: bundleVersion
+  - is_true: antlrVersion
+  - is_true: grammarHash
+  - match: {startRuleIndex: 0}
+  - gt: {lexerSerializedATN.0: 0}
+  - is_true: lexerRuleNames.0
+  - is_true: channelNames.0
+  - is_true: modeNames.0
+  - gt: {parserSerializedATN.0: 0}
+  - is_true: parserRuleNames.0
+  - is_true: symbolicNames.1
+  - is_true: tokenDictionary.PIPE
+  - gt: {ignoredTokens.0: 0}
+  - gt: {rulesToVisit.0: 0}
diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
index ca3442d0272..ff29dfebeb6 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
@@ -34,8 +34,7 @@ public class RestPPLGrammarAction extends BaseRestHandler {
   private static final String ENDPOINT_PATH = "/_plugins/_ppl/_grammar";
 
   // Lazy-initialized singleton bundle (built once per JVM lifecycle)
-  private volatile GrammarBundle cachedBundle;
-  private final Object bundleLock = new Object();
+  private GrammarBundle cachedBundle;
 
   @Override
   public String getName() {
@@ -59,23 +58,17 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
         channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder));
       } catch (Exception e) {
         log.error("Error building or serializing PPL grammar", e);
-        channel.sendResponse(new BytesRestResponse(channel, RestStatus.INTERNAL_SERVER_ERROR, e));
+        channel.sendResponse(new BytesRestResponse(channel, e));
       }
     };
   }
 
-  // Thread-safe lazy initialization with double-checked locking.
-  private GrammarBundle getOrBuildBundle() {
-    if (cachedBundle != null) {
-      return cachedBundle;
-    }
-    synchronized (bundleLock) {
-      // double-check after acquiring lock
-      if (cachedBundle == null) {
-        cachedBundle = buildBundle();
-      }
-      return cachedBundle;
+  // Thread-safe lazy initialization.
+  private synchronized GrammarBundle getOrBuildBundle() {
+    if (cachedBundle == null) {
+      cachedBundle = buildBundle();
     }
+    return cachedBundle;
   }
 
   /** Constructs the grammar bundle. Override in tests to inject a custom or failing builder. */
@@ -86,10 +79,8 @@ protected GrammarBundle buildBundle() {
 
   /** Invalidate the cached bundle, forcing a rebuild on the next request. */
   @VisibleForTesting
-  protected void invalidateCache() {
-    synchronized (bundleLock) {
-      cachedBundle = null;
-    }
+  protected synchronized void invalidateCache() {
+    cachedBundle = null;
   }
 
   private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) throws IOException {
diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
index bc05d08192a..575c1db6b92 100644
--- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -137,16 +137,27 @@ protected GrammarBundle buildBundle() {
 
   @Test
   public void testInvalidateCache() throws Exception {
+    AtomicInteger buildCount = new AtomicInteger(0);
+    RestPPLGrammarAction countingAction =
+        new RestPPLGrammarAction() {
+          @Override
+          protected GrammarBundle buildBundle() {
+            buildCount.incrementAndGet();
+            return super.buildBundle();
+          }
+        };
+
     FakeRestRequest request1 =
         new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
             .withMethod(RestRequest.Method.GET)
             .withPath("/_plugins/_ppl/_grammar")
             .build();
     MockRestChannel channel1 = new MockRestChannel(request1, true);
-    action.handleRequest(request1, channel1, client);
+    countingAction.handleRequest(request1, channel1, client);
     assertEquals(RestStatus.OK, channel1.getResponse().status());
+    assertEquals("Bundle should be built on first request", 1, buildCount.get());
 
-    action.invalidateCache();
+    countingAction.invalidateCache();
 
     FakeRestRequest request2 =
         new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
@@ -154,15 +165,9 @@ public void testInvalidateCache() throws Exception {
             .withPath("/_plugins/_ppl/_grammar")
             .build();
     MockRestChannel channel2 = new MockRestChannel(request2, true);
-    action.handleRequest(request2, channel2, client);
+    countingAction.handleRequest(request2, channel2, client);
     assertEquals(RestStatus.OK, channel2.getResponse().status());
-
-    String content1 = channel1.getResponse().content().utf8ToString();
-    String content2 = channel2.getResponse().content().utf8ToString();
-    assertEquals(
-        "Grammar hash should be identical after cache invalidation and rebuild",
-        content1,
-        content2);
+    assertEquals("Bundle should be rebuilt after cache invalidation", 2, buildCount.get());
   }
 
   @Test
@@ -186,6 +191,27 @@ protected GrammarBundle buildBundle() {
     assertEquals(RestStatus.INTERNAL_SERVER_ERROR, channel.getResponse().status());
   }
 
+  @Test
+  public void testGetGrammar_NullBundle_Returns500() throws Exception {
+    RestPPLGrammarAction nullBundleAction =
+        new RestPPLGrammarAction() {
+          @Override
+          protected GrammarBundle buildBundle() {
+            return null;
+          }
+        };
+
+    FakeRestRequest request =
+        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
+            .withMethod(RestRequest.Method.GET)
+            .withPath("/_plugins/_ppl/_grammar")
+            .build();
+    MockRestChannel channel = new MockRestChannel(request, true);
+    nullBundleAction.handleRequest(request, channel, client);
+
+    assertEquals(RestStatus.INTERNAL_SERVER_ERROR, channel.getResponse().status());
+  }
+
   /** Mock RestChannel to capture responses */
   private static class MockRestChannel implements RestChannel {
     private final RestRequest request;
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
index f0a8c8968cc..552c3f39167 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
@@ -5,6 +5,9 @@
 
 package org.opensearch.sql.ppl.autocomplete;
 
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedHashMap;
 import java.util.Map;
 import lombok.Builder;
 import lombok.NonNull;
@@ -21,7 +24,10 @@ public class GrammarBundle {
   /** ANTLR runtime version used to generate the grammar. */
   @NonNull private String antlrVersion;
 
-  /** SHA-256 hash of the serialized ATN data. Clients may use this to detect grammar changes. */
+  /**
+   * SHA-256 hash of grammar metadata used by autocomplete (ATN, rule names, vocabulary, ANTLR
+   * version). Clients may use this to detect grammar changes.
+   */
   @NonNull private String grammarHash;
 
   /** Serialized lexer ATN as int array (ATNSerializer output). */
@@ -59,23 +65,95 @@ public class GrammarBundle {
 
   /**
    * Autocomplete token dictionary — maps semantic names used by the autocomplete enrichment logic
-   * (e.g. "SPACE", "PIPE", "SOURCE") to their token type IDs in this grammar. Clients use this
-   * to configure token-aware enrichment without hardcoding token IDs.
+   * (e.g. "SPACE", "PIPE", "SOURCE") to their token type IDs in this grammar. Clients use this to
+   * configure token-aware enrichment without hardcoding token IDs.
    */
   @NonNull private Map<String, Integer> tokenDictionary;
 
   /**
-   * Token type IDs that should be ignored by CodeCompletionCore during candidate collection.
-   * These are tokens like functions, operators, and internal tokens that should not appear
-   * as direct keyword suggestions (e.g. AVG, COUNT, PIPE operators).
+   * Token type IDs that should be ignored by CodeCompletionCore during candidate collection. These
+   * are lexical/internal tokens that should not appear as direct keyword suggestions.
    */
   @NonNull private int[] ignoredTokens;
 
   /**
-   * Parser rule indices that CodeCompletionCore should treat as preferred rules.
-   * When these rules are candidate alternatives, CodeCompletionCore reports them as rule
-   * candidates instead of expanding into their child tokens. The autocomplete enrichment
-   * uses these to trigger semantic suggestions (e.g. suggest fields, suggest tables).
+   * Parser rule indices that CodeCompletionCore should treat as preferred rules. When these rules
+   * are candidate alternatives, CodeCompletionCore reports them as rule candidates instead of
+   * expanding into their child tokens. The autocomplete enrichment uses these to trigger semantic
+   * suggestions (e.g. suggest fields, suggest tables).
    */
   @NonNull private int[] rulesToVisit;
+
+  public int[] getLexerSerializedATN() {
+    return copy(lexerSerializedATN);
+  }
+
+  public String[] getLexerRuleNames() {
+    return copy(lexerRuleNames);
+  }
+
+  public String[] getChannelNames() {
+    return copy(channelNames);
+  }
+
+  public String[] getModeNames() {
+    return copy(modeNames);
+  }
+
+  public int[] getParserSerializedATN() {
+    return copy(parserSerializedATN);
+  }
+
+  public String[] getParserRuleNames() {
+    return copy(parserRuleNames);
+  }
+
+  public String[] getLiteralNames() {
+    return copy(literalNames);
+  }
+
+  public String[] getSymbolicNames() {
+    return copy(symbolicNames);
+  }
+
+  public Map<String, Integer> getTokenDictionary() {
+    return Collections.unmodifiableMap(new LinkedHashMap<>(tokenDictionary));
+  }
+
+  public int[] getIgnoredTokens() {
+    return copy(ignoredTokens);
+  }
+
+  public int[] getRulesToVisit() {
+    return copy(rulesToVisit);
+  }
+
+  public static class GrammarBundleBuilder {
+    public GrammarBundle build() {
+      return new GrammarBundle(
+          bundleVersion,
+          antlrVersion,
+          grammarHash,
+          copy(lexerSerializedATN),
+          copy(lexerRuleNames),
+          copy(channelNames),
+          copy(modeNames),
+          copy(parserSerializedATN),
+          copy(parserRuleNames),
+          startRuleIndex,
+          copy(literalNames),
+          copy(symbolicNames),
+          Collections.unmodifiableMap(new LinkedHashMap<>(tokenDictionary)),
+          copy(ignoredTokens),
+          copy(rulesToVisit));
+    }
+  }
+
+  private static int[] copy(int[] values) {
+    return Arrays.copyOf(values, values.length);
+  }
+
+  private static String[] copy(String[] values) {
+    return Arrays.copyOf(values, values.length);
+  }
 }
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
index fc3cdb6b510..8c5f7db70ec 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
@@ -64,7 +64,14 @@ public GrammarBundle build() {
     return GrammarBundle.builder()
         .bundleVersion(BUNDLE_VERSION)
         .antlrVersion(ANTLR_VERSION)
-        .grammarHash(computeGrammarHash(lexerATN, parserATN))
+        .grammarHash(
+            computeGrammarHash(
+                lexerATN,
+                parserATN,
+                lexer.getRuleNames(),
+                parser.getRuleNames(),
+                literalNames,
+                symbolicNames))
         .lexerSerializedATN(lexerATN)
         .parserSerializedATN(parserATN)
         .lexerRuleNames(lexer.getRuleNames())
@@ -81,9 +88,9 @@ public GrammarBundle build() {
   }
 
   /**
-   * Build the token dictionary — semantic name → token type ID mapping. Uses lexer constants
-   * since token type IDs are defined by the lexer. The frontend autocomplete enrichment uses
-   * these to identify tokens like SPACE, PIPE, SOURCE by name.
+   * Build the token dictionary — semantic name → token type ID mapping. Uses lexer constants since
+   * token type IDs are defined by the lexer. The frontend autocomplete enrichment uses these to
+   * identify tokens like SPACE, PIPE, SOURCE by name.
    */
   private static Map<String, Integer> buildTokenDictionary(Vocabulary vocabulary) {
     Map<String, Integer> dict = new LinkedHashMap<>();
@@ -133,18 +140,30 @@ private static boolean isLexicalInternalToken(String symbolicName) {
   }
 
   /**
-   * Build the list of parser rule indices for CodeCompletionCore preferredRules.
-   * These rules trigger semantic suggestions (suggest fields, tables, functions, etc.).
+   * Build the list of parser rule indices for CodeCompletionCore preferredRules. These rules
+   * trigger semantic suggestions (suggest fields, tables, functions, etc.).
    *
    * @throws IllegalStateException if any expected rule name is not found in the parser grammar
    */
   private static int[] buildRulesToVisit(String[] ruleNames) {
-    List<String> ruleNamesToVisit = Arrays.asList(
-        "statsFunctionName", "takeAggFunction", "integerLiteral", "decimalLiteral",
-        "keywordsCanBeId", "renameClasue", "qualifiedName", "tableQualifiedName",
-        "wcQualifiedName", "positionFunctionName", "searchableKeyWord", "stringLiteral",
-        "searchCommand", "searchComparisonOperator", "comparisonOperator", "sqlLikeJoinType"
-    );
+    List<String> ruleNamesToVisit =
+        Arrays.asList(
+            "statsFunctionName",
+            "takeAggFunction",
+            "integerLiteral",
+            "decimalLiteral",
+            "keywordsCanBeId",
+            "renameClasue",
+            "qualifiedName",
+            "tableQualifiedName",
+            "wcQualifiedName",
+            "positionFunctionName",
+            "searchableKeyWord",
+            "stringLiteral",
+            "searchCommand",
+            "searchComparisonOperator",
+            "comparisonOperator",
+            "sqlLikeJoinType");
 
     List<String> ruleNamesList = Arrays.asList(ruleNames);
     int[] indices = new int[ruleNamesToVisit.size()];
@@ -153,7 +172,9 @@ private static int[] buildRulesToVisit(String[] ruleNames) {
       int idx = ruleNamesList.indexOf(name);
       if (idx < 0) {
         throw new IllegalStateException(
-            "Parser rule '" + name + "' not found in grammar — "
+            "Parser rule '"
+                + name
+                + "' not found in grammar — "
                 + "was it renamed or removed from OpenSearchPPLParser.g4?");
       }
       indices[i] = idx;
@@ -176,11 +197,21 @@ private static int resolveStartRuleIndex(String[] ruleNames) {
     return Math.max(idx, 0);
   }
 
-  private static String computeGrammarHash(int[] lexerATN, int[] parserATN) {
+  private static String computeGrammarHash(
+      int[] lexerATN,
+      int[] parserATN,
+      String[] lexerRuleNames,
+      String[] parserRuleNames,
+      String[] literalNames,
+      String[] symbolicNames) {
     try {
       MessageDigest digest = MessageDigest.getInstance("SHA-256");
       updateDigest(digest, lexerATN);
       updateDigest(digest, parserATN);
+      updateDigest(digest, lexerRuleNames);
+      updateDigest(digest, parserRuleNames);
+      updateDigest(digest, literalNames);
+      updateDigest(digest, symbolicNames);
       digest.update(ANTLR_VERSION.getBytes(StandardCharsets.UTF_8));
       byte[] hash = digest.digest();
       // Output is always "sha256:" (7 chars) + 64 hex chars = 71 chars.
@@ -203,4 +234,17 @@ private static void updateDigest(MessageDigest digest, int[] data) {
       digest.update((byte) (v));
     }
   }
+
+  private static void updateDigest(MessageDigest digest, String[] data) {
+    for (String value : data) {
+      if (value == null) {
+        digest.update((byte) 0);
+      } else {
+        digest.update((byte) 1);
+        digest.update(value.getBytes(StandardCharsets.UTF_8));
+      }
+      // field separator to avoid concatenation ambiguities
+      digest.update((byte) 0xFF);
+    }
+  }
 }
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
index fb5dd4dac9b..590fbe76507 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
@@ -9,6 +9,7 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.util.Arrays;
 import java.util.HashSet;
@@ -226,6 +227,40 @@ public void testNewerGrammarKeywordsAreNotIgnoredWhenPresent() {
     assertTokenNotIgnoredIfPresent("USE_PIT");
   }
 
+  @Test
+  public void testArrayGettersAreDefensiveCopies() {
+    int[] ignoredBefore = bundle.getIgnoredTokens();
+    int original = ignoredBefore[0];
+    ignoredBefore[0] = -1;
+    assertEquals(
+        "ignoredTokens getter should return a defensive copy",
+        original,
+        bundle.getIgnoredTokens()[0]);
+
+    String[] symbolsBefore = bundle.getSymbolicNames();
+    String originalSymbol = symbolsBefore[1];
+    symbolsBefore[1] = "MUTATED";
+    assertEquals(
+        "symbolicNames getter should return a defensive copy",
+        originalSymbol,
+        bundle.getSymbolicNames()[1]);
+  }
+
+  @Test
+  public void testTokenDictionaryGetterIsUnmodifiableCopy() {
+    Map<String, Integer> dict = bundle.getTokenDictionary();
+    Integer pipeBefore = dict.get("PIPE");
+
+    try {
+      dict.put("PIPE", -1);
+      fail("tokenDictionary getter should return an unmodifiable map");
+    } catch (UnsupportedOperationException expected) {
+      // expected
+    }
+
+    assertEquals(pipeBefore, bundle.getTokenDictionary().get("PIPE"));
+  }
+
   private static Set<Integer> ignoredTokenSet() {
     Set<Integer> ignored = new HashSet<>();
     for (int tokenType : bundle.getIgnoredTokens()) {
@@ -237,7 +272,8 @@ private static Set<Integer> ignoredTokenSet() {
   private static void assertTokenNotIgnoredIfPresent(String symbolicTokenName) {
     int tokenType = tokenTypeBySymbolicName(symbolicTokenName);
     if (tokenType >= 0) {
-      assertFalse(symbolicTokenName + " should not be ignored", ignoredTokenSet().contains(tokenType));
+      assertFalse(
+          symbolicTokenName + " should not be ignored", ignoredTokenSet().contains(tokenType));
     }
   }
 

From 2350fd77f3b8c5d01193c5b611b07bf1ad673fde Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Tue, 3 Mar 2026 07:23:42 -0800
Subject: [PATCH 17/20] fix test duplication

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../plugin/rest/RestPPLGrammarActionTest.java | 49 ++++++-------------
 1 file changed, 14 insertions(+), 35 deletions(-)

diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
index 575c1db6b92..cb239ce33e1 100644
--- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -54,11 +54,7 @@ public void testRoutes() {
 
   @Test
   public void testGetGrammar_ReturnsBundle() throws Exception {
-    FakeRestRequest request =
-        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
-            .withMethod(RestRequest.Method.GET)
-            .withPath("/_plugins/_ppl/_grammar")
-            .build();
+    FakeRestRequest request = newGrammarGetRequest();
 
     MockRestChannel channel = new MockRestChannel(request, true);
     action.handleRequest(request, channel, client);
@@ -112,19 +108,11 @@ protected GrammarBundle buildBundle() {
           }
         };
 
-    FakeRestRequest request1 =
-        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
-            .withMethod(RestRequest.Method.GET)
-            .withPath("/_plugins/_ppl/_grammar")
-            .build();
+    FakeRestRequest request1 = newGrammarGetRequest();
     MockRestChannel channel1 = new MockRestChannel(request1, true);
     countingAction.handleRequest(request1, channel1, client);
 
-    FakeRestRequest request2 =
-        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
-            .withMethod(RestRequest.Method.GET)
-            .withPath("/_plugins/_ppl/_grammar")
-            .build();
+    FakeRestRequest request2 = newGrammarGetRequest();
     MockRestChannel channel2 = new MockRestChannel(request2, true);
     countingAction.handleRequest(request2, channel2, client);
 
@@ -147,11 +135,7 @@ protected GrammarBundle buildBundle() {
           }
         };
 
-    FakeRestRequest request1 =
-        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
-            .withMethod(RestRequest.Method.GET)
-            .withPath("/_plugins/_ppl/_grammar")
-            .build();
+    FakeRestRequest request1 = newGrammarGetRequest();
     MockRestChannel channel1 = new MockRestChannel(request1, true);
     countingAction.handleRequest(request1, channel1, client);
     assertEquals(RestStatus.OK, channel1.getResponse().status());
@@ -159,11 +143,7 @@ protected GrammarBundle buildBundle() {
 
     countingAction.invalidateCache();
 
-    FakeRestRequest request2 =
-        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
-            .withMethod(RestRequest.Method.GET)
-            .withPath("/_plugins/_ppl/_grammar")
-            .build();
+    FakeRestRequest request2 = newGrammarGetRequest();
     MockRestChannel channel2 = new MockRestChannel(request2, true);
     countingAction.handleRequest(request2, channel2, client);
     assertEquals(RestStatus.OK, channel2.getResponse().status());
@@ -180,11 +160,7 @@ protected GrammarBundle buildBundle() {
           }
         };
 
-    FakeRestRequest request =
-        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
-            .withMethod(RestRequest.Method.GET)
-            .withPath("/_plugins/_ppl/_grammar")
-            .build();
+    FakeRestRequest request = newGrammarGetRequest();
     MockRestChannel channel = new MockRestChannel(request, true);
     failingAction.handleRequest(request, channel, client);
 
@@ -201,17 +177,20 @@ protected GrammarBundle buildBundle() {
           }
         };
 
-    FakeRestRequest request =
-        new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
-            .withMethod(RestRequest.Method.GET)
-            .withPath("/_plugins/_ppl/_grammar")
-            .build();
+    FakeRestRequest request = newGrammarGetRequest();
     MockRestChannel channel = new MockRestChannel(request, true);
     nullBundleAction.handleRequest(request, channel, client);
 
     assertEquals(RestStatus.INTERNAL_SERVER_ERROR, channel.getResponse().status());
   }
 
+  private static FakeRestRequest newGrammarGetRequest() {
+    return new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
+        .withMethod(RestRequest.Method.GET)
+        .withPath("/_plugins/_ppl/_grammar")
+        .build();
+  }
+
   /** Mock RestChannel to capture responses */
   private static class MockRestChannel implements RestChannel {
     private final RestRequest request;

From faac85bc0ba9c315368c89b31add06b547bc3395 Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Tue, 3 Mar 2026 07:58:48 -0800
Subject: [PATCH 18/20] Polish grammar bundle builder and stabilize grammar
 endpoint doctest

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 docs/user/ppl/interfaces/endpoint.md          | 24 ++++++-------------
 .../autocomplete/PPLGrammarBundleBuilder.java | 18 +++-----------
 2 files changed, 10 insertions(+), 32 deletions(-)

diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md
index b52412880db..3f87180dca6 100644
--- a/docs/user/ppl/interfaces/endpoint.md
+++ b/docs/user/ppl/interfaces/endpoint.md
@@ -211,25 +211,15 @@ You can send an HTTP GET request to endpoint **/_plugins/_ppl/_grammar** to fetc
 ### Example
 
 ```bash
-curl -sS -X GET localhost:9200/_plugins/_ppl/_grammar
+curl -sS -X GET localhost:9200/_plugins/_ppl/_grammar | python3 -m json.tool | grep -E '"bundleVersion"|"antlrVersion"|"startRuleIndex"|"ignoredTokens"|"rulesToVisit"'
 ```
 
 Expected output (trimmed):
 
-```json
-{
-  "bundleVersion": "1.0",
-  "antlrVersion": "4.13.2",
-  "grammarHash": "sha256:...",
-  "startRuleIndex": 0,
-  "lexerSerializedATN": [4, ...],
-  "parserSerializedATN": [4, ...],
-  "lexerRuleNames": ["SEARCH", "..."],
-  "parserRuleNames": ["root", "..."],
-  "literalNames": [null, "'SEARCH'", "..."],
-  "symbolicNames": [null, "SEARCH", "..."],
-  "tokenDictionary": {"PIPE": 196, "...": 0},
-  "ignoredTokens": [472, 473, "..."],
-  "rulesToVisit": [200, 201, "..."]
-}
+```text
+"bundleVersion": "1.0",
+"antlrVersion": "4.13.2",
+"startRuleIndex": 0,
+"ignoredTokens": [
+"rulesToVisit": [
 ```
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
index 8c5f7db70ec..32d671b7e67 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
@@ -81,7 +81,7 @@ public GrammarBundle build() {
         .startRuleIndex(resolveStartRuleIndex(parser.getRuleNames()))
         .literalNames(literalNames)
         .symbolicNames(symbolicNames)
-        .tokenDictionary(buildTokenDictionary(vocabulary))
+        .tokenDictionary(buildTokenDictionary())
         .ignoredTokens(buildIgnoredTokens(vocabulary))
         .rulesToVisit(buildRulesToVisit(parser.getRuleNames()))
         .build();
@@ -90,12 +90,10 @@ public GrammarBundle build() {
   /**
    * Build the token dictionary — semantic name → token type ID mapping. Uses lexer constants since
    * token type IDs are defined by the lexer. The frontend autocomplete enrichment uses these to
-   * identify tokens like SPACE, PIPE, SOURCE by name.
+   * identify tokens like PIPE and SOURCE by name.
    */
-  private static Map<String, Integer> buildTokenDictionary(Vocabulary vocabulary) {
+  private static Map<String, Integer> buildTokenDictionary() {
     Map<String, Integer> dict = new LinkedHashMap<>();
-    // SPACE token may not exist in this grammar (whitespace may be implicitly skipped).
-    // Resolve by searching symbolic names; use -1 if not found.
     dict.put("WHITESPACE", OpenSearchPPLLexer.WHITESPACE);
     dict.put("FROM", OpenSearchPPLLexer.FROM);
     dict.put("OPENING_BRACKET", OpenSearchPPLLexer.LT_PRTHS);
@@ -182,16 +180,6 @@ private static int[] buildRulesToVisit(String[] ruleNames) {
     return indices;
   }
 
-  /** Resolve a token type ID from the vocabulary by symbolic name. Returns -1 if not found. */
-  private static int resolveTokenType(Vocabulary vocabulary, String name) {
-    for (int i = 0; i <= vocabulary.getMaxTokenType(); i++) {
-      if (name.equals(vocabulary.getSymbolicName(i))) {
-        return i;
-      }
-    }
-    return -1;
-  }
-
   private static int resolveStartRuleIndex(String[] ruleNames) {
     int idx = Arrays.asList(ruleNames).indexOf("root");
     return Math.max(idx, 0);

From 2c5b14ebb57e0cb6fd695473bbf05bc0bfb9df6e Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Tue, 3 Mar 2026 09:03:21 -0800
Subject: [PATCH 19/20] address issue: transport action wrapper

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../sql/security/PPLPermissionsIT.java        | 43 +++++++++++++++
 .../sql/plugin/rest/RestPPLGrammarAction.java | 50 ++++++++++++++---
 .../transport/TransportPPLQueryAction.java    | 10 +++-
 .../transport/TransportPPLQueryRequest.java   | 11 +++-
 .../plugin/rest/RestPPLGrammarActionTest.java | 54 ++++++++++++++++++-
 5 files changed, 159 insertions(+), 9 deletions(-)

diff --git a/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java b/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java
index d84ca2af1b3..ed25a1df2d9 100644
--- a/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java
+++ b/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java
@@ -339,6 +339,23 @@ private JSONObject executeQueryAsUser(String query, String username) throws IOEx
     return new JSONObject(org.opensearch.sql.legacy.TestUtils.getResponseBody(response, true));
   }
 
+  /** Executes a grammar metadata request as a specific user with basic authentication. */
+  private JSONObject executeGrammarAsUser(String username) throws IOException {
+    Request request = new Request("GET", "/_plugins/_ppl/_grammar");
+
+    RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder();
+    restOptionsBuilder.addHeader(
+        "Authorization",
+        "Basic "
+            + java.util.Base64.getEncoder()
+                .encodeToString((username + ":" + STRONG_PASSWORD).getBytes()));
+    request.setOptions(restOptionsBuilder);
+
+    Response response = client().performRequest(request);
+    assertEquals(200, response.getStatusLine().getStatusCode());
+    return new JSONObject(org.opensearch.sql.legacy.TestUtils.getResponseBody(response, true));
+  }
+
   @Test
   public void testUserWithBankPermissionCanAccessBankIndex() throws IOException {
     // Test that bank_user can access bank index - this should work with the fix
@@ -512,6 +529,32 @@ public void testBankUserWithEvalCommand() throws IOException {
     verifyColumn(result, columnName("full_name"));
   }
 
+  @Test
+  public void testUserWithPPLPermissionCanAccessGrammarEndpoint() throws IOException {
+    JSONObject result = executeGrammarAsUser(BANK_USER);
+    assertTrue(result.has("bundleVersion"));
+    assertTrue(result.has("antlrVersion"));
+    assertTrue(result.has("grammarHash"));
+    assertTrue(result.has("tokenDictionary"));
+  }
+
+  @Test
+  public void testUserWithoutPPLPermissionCannotAccessGrammarEndpoint() throws IOException {
+    try {
+      executeGrammarAsUser(NO_PPL_USER);
+      fail("Expected security exception for user without PPL permission");
+    } catch (ResponseException e) {
+      assertEquals(403, e.getResponse().getStatusLine().getStatusCode());
+      String responseBody =
+          org.opensearch.sql.legacy.TestUtils.getResponseBody(e.getResponse(), false);
+      assertTrue(
+          "Response should contain permission error message",
+          responseBody.contains("no permissions")
+              || responseBody.contains("Forbidden")
+              || responseBody.contains("cluster:admin/opensearch/ppl"));
+    }
+  }
+
   // Negative test cases for missing permissions
 
   @Test
diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
index ff29dfebeb6..5c1b128572e 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
@@ -13,11 +13,16 @@
 import java.util.List;
 import lombok.extern.log4j.Log4j2;
 import org.opensearch.common.annotation.ExperimentalApi;
+import org.opensearch.core.action.ActionListener;
 import org.opensearch.core.rest.RestStatus;
 import org.opensearch.core.xcontent.XContentBuilder;
 import org.opensearch.rest.BaseRestHandler;
 import org.opensearch.rest.BytesRestResponse;
+import org.opensearch.rest.RestChannel;
 import org.opensearch.rest.RestRequest;
+import org.opensearch.sql.plugin.transport.PPLQueryAction;
+import org.opensearch.sql.plugin.transport.TransportPPLQueryRequest;
+import org.opensearch.sql.plugin.transport.TransportPPLQueryResponse;
 import org.opensearch.sql.ppl.autocomplete.GrammarBundle;
 import org.opensearch.sql.ppl.autocomplete.PPLGrammarBundleBuilder;
 import org.opensearch.transport.client.node.NodeClient;
@@ -52,17 +57,50 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
 
     return channel -> {
       try {
-        GrammarBundle bundle = getOrBuildBundle();
-        XContentBuilder builder = channel.newBuilder();
-        serializeBundle(builder, bundle);
-        channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder));
+        authorizeRequest(
+            client,
+            new ActionListener<>() {
+              @Override
+              public void onResponse(TransportPPLQueryResponse ignored) {
+                try {
+                  GrammarBundle bundle = getOrBuildBundle();
+                  XContentBuilder builder = channel.newBuilder();
+                  serializeBundle(builder, bundle);
+                  channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder));
+                } catch (Exception e) {
+                  log.error("Error building or serializing PPL grammar", e);
+                  sendErrorResponse(channel, e);
+                }
+              }
+
+              @Override
+              public void onFailure(Exception e) {
+                log.error("PPL grammar authorization failed", e);
+                sendErrorResponse(channel, e);
+              }
+            });
       } catch (Exception e) {
-        log.error("Error building or serializing PPL grammar", e);
-        channel.sendResponse(new BytesRestResponse(channel, e));
+        log.error("Error authorizing PPL grammar request", e);
+        sendErrorResponse(channel, e);
       }
     };
   }
 
+  @VisibleForTesting
+  protected void authorizeRequest(
+      NodeClient client, ActionListener<TransportPPLQueryResponse> listener) {
+    client.execute(
+        PPLQueryAction.INSTANCE, new TransportPPLQueryRequest("", null, ENDPOINT_PATH), listener);
+  }
+
+  private void sendErrorResponse(RestChannel channel, Exception e) {
+    try {
+      channel.sendResponse(new BytesRestResponse(channel, e));
+    } catch (IOException ioException) {
+      log.error("Failed to send PPL grammar error response", ioException);
+    }
+  }
+
   // Thread-safe lazy initialization.
   private synchronized GrammarBundle getOrBuildBundle() {
     if (cachedBundle == null) {
diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java
index 27bfe2084f7..48bc36374a8 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java
@@ -100,13 +100,21 @@ protected void doExecute(
                   + " false"));
       return;
     }
+
+    TransportPPLQueryRequest transportRequest = TransportPPLQueryRequest.fromActionRequest(request);
+    if (transportRequest.isGrammarRequest()) {
+      // Authorization is enforced by this transport action before returning grammar metadata in
+      // REST.
+      listener.onResponse(new TransportPPLQueryResponse("{}"));
+      return;
+    }
+
     Metrics.getInstance().getNumericalMetric(MetricName.PPL_REQ_TOTAL).increment();
     Metrics.getInstance().getNumericalMetric(MetricName.PPL_REQ_COUNT_TOTAL).increment();
 
     QueryContext.addRequestId();
 
     PPLService pplService = injector.getInstance(PPLService.class);
-    TransportPPLQueryRequest transportRequest = TransportPPLQueryRequest.fromActionRequest(request);
     // in order to use PPL service, we need to convert TransportPPLQueryRequest to PPLQueryRequest
     PPLQueryRequest transformedRequest = transportRequest.toPPLQueryRequest();
     QueryContext.setProfile(transformedRequest.profile());
diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequest.java b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequest.java
index e342d9a90f0..6db2bd249ae 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequest.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequest.java
@@ -119,7 +119,16 @@ public String getRequest() {
    * @return true if it is an explain request
    */
   public boolean isExplainRequest() {
-    return path.endsWith("/_explain");
+    return path != null && path.endsWith("/_explain");
+  }
+
+  /**
+   * Check if request is for grammar metadata endpoint.
+   *
+   * @return true if it is a grammar metadata request
+   */
+  public boolean isGrammarRequest() {
+    return path != null && path.endsWith("/_grammar");
   }
 
   /** Decide on the formatter by the requested format. */
diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
index cb239ce33e1..92d56710c08 100644
--- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -15,8 +15,10 @@
 import org.json.JSONObject;
 import org.junit.Before;
 import org.junit.Test;
+import org.opensearch.OpenSearchStatusException;
 import org.opensearch.common.io.stream.BytesStreamOutput;
 import org.opensearch.common.xcontent.XContentType;
+import org.opensearch.core.action.ActionListener;
 import org.opensearch.core.rest.RestStatus;
 import org.opensearch.core.xcontent.MediaType;
 import org.opensearch.core.xcontent.NamedXContentRegistry;
@@ -24,6 +26,7 @@
 import org.opensearch.rest.RestChannel;
 import org.opensearch.rest.RestRequest;
 import org.opensearch.rest.RestResponse;
+import org.opensearch.sql.plugin.transport.TransportPPLQueryResponse;
 import org.opensearch.sql.ppl.autocomplete.GrammarBundle;
 import org.opensearch.test.rest.FakeRestRequest;
 import org.opensearch.transport.client.node.NodeClient;
@@ -36,7 +39,14 @@ public class RestPPLGrammarActionTest {
 
   @Before
   public void setUp() {
-    action = new RestPPLGrammarAction();
+    action =
+        new RestPPLGrammarAction() {
+          @Override
+          protected void authorizeRequest(
+              NodeClient client, ActionListener<TransportPPLQueryResponse> listener) {
+            listener.onResponse(new TransportPPLQueryResponse("{}"));
+          }
+        };
     client = mock(NodeClient.class);
   }
 
@@ -101,6 +111,12 @@ public void testGetGrammar_BundleIsCached() throws Exception {
     AtomicInteger buildCount = new AtomicInteger(0);
     RestPPLGrammarAction countingAction =
         new RestPPLGrammarAction() {
+          @Override
+          protected void authorizeRequest(
+              NodeClient client, ActionListener<TransportPPLQueryResponse> listener) {
+            listener.onResponse(new TransportPPLQueryResponse("{}"));
+          }
+
           @Override
           protected GrammarBundle buildBundle() {
             buildCount.incrementAndGet();
@@ -128,6 +144,12 @@ public void testInvalidateCache() throws Exception {
     AtomicInteger buildCount = new AtomicInteger(0);
     RestPPLGrammarAction countingAction =
         new RestPPLGrammarAction() {
+          @Override
+          protected void authorizeRequest(
+              NodeClient client, ActionListener<TransportPPLQueryResponse> listener) {
+            listener.onResponse(new TransportPPLQueryResponse("{}"));
+          }
+
           @Override
           protected GrammarBundle buildBundle() {
             buildCount.incrementAndGet();
@@ -154,6 +176,12 @@ protected GrammarBundle buildBundle() {
   public void testGetGrammar_ErrorPath_Returns500() throws Exception {
     RestPPLGrammarAction failingAction =
         new RestPPLGrammarAction() {
+          @Override
+          protected void authorizeRequest(
+              NodeClient client, ActionListener<TransportPPLQueryResponse> listener) {
+            listener.onResponse(new TransportPPLQueryResponse("{}"));
+          }
+
           @Override
           protected GrammarBundle buildBundle() {
             throw new RuntimeException("simulated build failure");
@@ -171,6 +199,12 @@ protected GrammarBundle buildBundle() {
   public void testGetGrammar_NullBundle_Returns500() throws Exception {
     RestPPLGrammarAction nullBundleAction =
         new RestPPLGrammarAction() {
+          @Override
+          protected void authorizeRequest(
+              NodeClient client, ActionListener<TransportPPLQueryResponse> listener) {
+            listener.onResponse(new TransportPPLQueryResponse("{}"));
+          }
+
           @Override
           protected GrammarBundle buildBundle() {
             return null;
@@ -184,6 +218,24 @@ protected GrammarBundle buildBundle() {
     assertEquals(RestStatus.INTERNAL_SERVER_ERROR, channel.getResponse().status());
   }
 
+  @Test
+  public void testGetGrammar_AuthorizationFailure_Returns403() throws Exception {
+    RestPPLGrammarAction unauthorizedAction =
+        new RestPPLGrammarAction() {
+          @Override
+          protected void authorizeRequest(
+              NodeClient client, ActionListener<TransportPPLQueryResponse> listener) {
+            listener.onFailure(new OpenSearchStatusException("forbidden", RestStatus.FORBIDDEN));
+          }
+        };
+
+    FakeRestRequest request = newGrammarGetRequest();
+    MockRestChannel channel = new MockRestChannel(request, true);
+    unauthorizedAction.handleRequest(request, channel, client);
+
+    assertEquals(RestStatus.FORBIDDEN, channel.getResponse().status());
+  }
+
   private static FakeRestRequest newGrammarGetRequest() {
     return new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
         .withMethod(RestRequest.Method.GET)

From da140ef88298bff330aaec261ff07c5211270746 Mon Sep 17 00:00:00 2001
From: Eric Wei <mengwei.eric@gmail.com>
Date: Tue, 3 Mar 2026 14:00:29 -0800
Subject: [PATCH 20/20] Refactor PPL grammar bundle loading to static holder
 singleton

Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
---
 .../sql/plugin/rest/RestPPLGrammarAction.java | 25 ++------
 .../plugin/rest/RestPPLGrammarActionTest.java | 60 ++++---------------
 .../autocomplete/PPLGrammarBundleBuilder.java | 15 ++++-
 .../PPLGrammarBundleBuilderTest.java          |  8 ++-
 4 files changed, 33 insertions(+), 75 deletions(-)

diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
index 5c1b128572e..f10a3e30831 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
@@ -38,9 +38,6 @@ public class RestPPLGrammarAction extends BaseRestHandler {
 
   private static final String ENDPOINT_PATH = "/_plugins/_ppl/_grammar";
 
-  // Lazy-initialized singleton bundle (built once per JVM lifecycle)
-  private GrammarBundle cachedBundle;
-
   @Override
   public String getName() {
     return "ppl_grammar_action";
@@ -63,7 +60,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
               @Override
               public void onResponse(TransportPPLQueryResponse ignored) {
                 try {
-                  GrammarBundle bundle = getOrBuildBundle();
+                  GrammarBundle bundle = getBundle();
                   XContentBuilder builder = channel.newBuilder();
                   serializeBundle(builder, bundle);
                   channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder));
@@ -101,24 +98,10 @@ private void sendErrorResponse(RestChannel channel, Exception e) {
     }
   }
 
-  // Thread-safe lazy initialization.
-  private synchronized GrammarBundle getOrBuildBundle() {
-    if (cachedBundle == null) {
-      cachedBundle = buildBundle();
-    }
-    return cachedBundle;
-  }
-
-  /** Constructs the grammar bundle. Override in tests to inject a custom or failing builder. */
-  @VisibleForTesting
-  protected GrammarBundle buildBundle() {
-    return new PPLGrammarBundleBuilder().build();
-  }
-
-  /** Invalidate the cached bundle, forcing a rebuild on the next request. */
+  /** Gets the grammar bundle. Override in tests to inject a custom or failing bundle provider. */
   @VisibleForTesting
-  protected synchronized void invalidateCache() {
-    cachedBundle = null;
+  protected GrammarBundle getBundle() {
+    return PPLGrammarBundleBuilder.getBundle();
   }
 
   private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) throws IOException {
diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
index 92d56710c08..2eee8750360 100644
--- a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
+++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -11,7 +11,6 @@
 import static org.mockito.Mockito.mock;
 
 import java.io.IOException;
-import java.util.concurrent.atomic.AtomicInteger;
 import org.json.JSONObject;
 import org.junit.Before;
 import org.junit.Test;
@@ -107,9 +106,9 @@ public void testGetGrammar_ReturnsBundle() throws Exception {
   }
 
   @Test
-  public void testGetGrammar_BundleIsCached() throws Exception {
-    AtomicInteger buildCount = new AtomicInteger(0);
-    RestPPLGrammarAction countingAction =
+  public void testGetGrammar_UsesBundleProvider() throws Exception {
+    int[] calls = {0};
+    RestPPLGrammarAction providerAction =
         new RestPPLGrammarAction() {
           @Override
           protected void authorizeRequest(
@@ -118,58 +117,21 @@ protected void authorizeRequest(
           }
 
           @Override
-          protected GrammarBundle buildBundle() {
-            buildCount.incrementAndGet();
-            return super.buildBundle();
+          protected GrammarBundle getBundle() {
+            calls[0]++;
+            return super.getBundle();
           }
         };
 
     FakeRestRequest request1 = newGrammarGetRequest();
     MockRestChannel channel1 = new MockRestChannel(request1, true);
-    countingAction.handleRequest(request1, channel1, client);
+    providerAction.handleRequest(request1, channel1, client);
 
     FakeRestRequest request2 = newGrammarGetRequest();
     MockRestChannel channel2 = new MockRestChannel(request2, true);
-    countingAction.handleRequest(request2, channel2, client);
+    providerAction.handleRequest(request2, channel2, client);
 
-    assertEquals("Bundle should be built exactly once", 1, buildCount.get());
-    assertEquals(
-        "Consecutive requests should return identical content",
-        channel1.getResponse().content().utf8ToString(),
-        channel2.getResponse().content().utf8ToString());
-  }
-
-  @Test
-  public void testInvalidateCache() throws Exception {
-    AtomicInteger buildCount = new AtomicInteger(0);
-    RestPPLGrammarAction countingAction =
-        new RestPPLGrammarAction() {
-          @Override
-          protected void authorizeRequest(
-              NodeClient client, ActionListener<TransportPPLQueryResponse> listener) {
-            listener.onResponse(new TransportPPLQueryResponse("{}"));
-          }
-
-          @Override
-          protected GrammarBundle buildBundle() {
-            buildCount.incrementAndGet();
-            return super.buildBundle();
-          }
-        };
-
-    FakeRestRequest request1 = newGrammarGetRequest();
-    MockRestChannel channel1 = new MockRestChannel(request1, true);
-    countingAction.handleRequest(request1, channel1, client);
-    assertEquals(RestStatus.OK, channel1.getResponse().status());
-    assertEquals("Bundle should be built on first request", 1, buildCount.get());
-
-    countingAction.invalidateCache();
-
-    FakeRestRequest request2 = newGrammarGetRequest();
-    MockRestChannel channel2 = new MockRestChannel(request2, true);
-    countingAction.handleRequest(request2, channel2, client);
-    assertEquals(RestStatus.OK, channel2.getResponse().status());
-    assertEquals("Bundle should be rebuilt after cache invalidation", 2, buildCount.get());
+    assertEquals("Bundle provider should be invoked once per request", 2, calls[0]);
   }
 
   @Test
@@ -183,7 +145,7 @@ protected void authorizeRequest(
           }
 
           @Override
-          protected GrammarBundle buildBundle() {
+          protected GrammarBundle getBundle() {
             throw new RuntimeException("simulated build failure");
           }
         };
@@ -206,7 +168,7 @@ protected void authorizeRequest(
           }
 
           @Override
-          protected GrammarBundle buildBundle() {
+          protected GrammarBundle getBundle() {
             return null;
           }
         };
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
index 32d671b7e67..cd992c740a9 100644
--- a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
+++ b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
@@ -23,7 +23,7 @@
 import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser;
 
 /** Builds the {@link GrammarBundle} for the PPL language from the generated ANTLR lexer/parser. */
-public class PPLGrammarBundleBuilder {
+public final class PPLGrammarBundleBuilder {
   private static final String ANTLR_VERSION =
       org.antlr.v4.runtime.RuntimeMetaData.getRuntimeVersion();
   private static final String BUNDLE_VERSION = "1.0";
@@ -44,7 +44,18 @@ public class PPLGrammarBundleBuilder {
               "BLOCK_COMMENT",
               "ERROR_RECOGNITION"));
 
-  public GrammarBundle build() {
+  private PPLGrammarBundleBuilder() {}
+
+  private static class BundleHolder {
+    private static final GrammarBundle INSTANCE = build();
+  }
+
+  /** Lazily builds and returns the singleton grammar bundle for this JVM. */
+  public static GrammarBundle getBundle() {
+    return BundleHolder.INSTANCE;
+  }
+
+  private static GrammarBundle build() {
     OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(CharStreams.fromString(""));
     CommonTokenStream tokens = new CommonTokenStream(lexer);
     OpenSearchPPLParser parser = new OpenSearchPPLParser(tokens);
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
index 590fbe76507..3792d98555c 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
@@ -8,6 +8,7 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -43,7 +44,7 @@ public class PPLGrammarBundleBuilderTest {
 
   @BeforeClass
   public static void buildBundle() {
-    bundle = new PPLGrammarBundleBuilder().build();
+    bundle = PPLGrammarBundleBuilder.getBundle();
   }
 
   @Test
@@ -140,9 +141,10 @@ public void testBuildVocabularyIsNonEmpty() {
 
   @Test
   public void testBuildIsDeterministic() {
-    GrammarBundle second = new PPLGrammarBundleBuilder().build();
+    GrammarBundle second = PPLGrammarBundleBuilder.getBundle();
+    assertSame("Bundle accessor should return the same singleton instance", bundle, second);
     assertEquals(
-        "Two builds of the same grammar should produce the same hash",
+        "Repeated accesses should produce the same hash",
         bundle.getGrammarHash(),
         second.getGrammarHash());
   }