diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java index 6ad935e59da..462f09c6c90 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java @@ -45,6 +45,32 @@ public class CalcitePlanContext { private static final ThreadLocal legacyPreferredFlag = ThreadLocal.withInitial(() -> true); + /** + * Thread-local extra search-source JSON from the PPL request body. Set by PPLService before query + * execution and read by AbstractCalciteIndexScan when building the OpenSearch request. The JSON + * string is parsed via {@code SearchSourceBuilder.fromXContent()} and selectively merged into the + * index scan request. + */ + private static final ThreadLocal extraSearchSource = new ThreadLocal<>(); + + public static void setExtraSearchSource(String json) { + extraSearchSource.set(json); + } + + public static String getExtraSearchSource() { + return extraSearchSource.get(); + } + + public static void clearExtraSearchSource() { + extraSearchSource.remove(); + } + + /** Convenience check: does the extra search source contain a highlight clause? */ + public static boolean hasHighlightInExtraSearchSource() { + String extra = extraSearchSource.get(); + return extra != null && extra.contains("\"highlight\""); + } + @Getter @Setter private boolean isResolvingJoinCondition = false; @Getter @Setter private boolean isResolvingSubquery = false; @Getter @Setter private boolean inCoalesceFunction = false; diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 84386dd0084..d59442150f4 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -23,6 +23,7 @@ import static org.opensearch.sql.calcite.utils.PlanUtils.getRelation; import static org.opensearch.sql.calcite.utils.PlanUtils.getRexCall; import static org.opensearch.sql.calcite.utils.PlanUtils.transformPlanToAttachChild; +import static org.opensearch.sql.expression.HighlightExpression.HIGHLIGHT_FIELD; import static org.opensearch.sql.utils.SystemIndexUtils.DATASOURCES_TABLE_NAME; import com.google.common.base.Strings; @@ -407,6 +408,15 @@ public RelNode visitProject(Project node, CalcitePlanContext context) { if (!context.isResolvingSubquery()) { context.setProjectVisited(true); } + // When highlight is active, include _highlight in the projection so it survives + // through the Calcite pipeline. This matches DSL behavior where _source filtering + // does not affect highlights. + if (CalcitePlanContext.hasHighlightInExtraSearchSource()) { + int hlIndex = currentFields.indexOf(HIGHLIGHT_FIELD); + if (hlIndex >= 0) { + expandedFields.add(context.relBuilder.field(hlIndex)); + } + } context.relBuilder.project(expandedFields); } return context.relBuilder.peek(); diff --git a/core/src/main/java/org/opensearch/sql/executor/execution/AbstractPlan.java b/core/src/main/java/org/opensearch/sql/executor/execution/AbstractPlan.java index e470d12507e..35e922cb647 100644 --- a/core/src/main/java/org/opensearch/sql/executor/execution/AbstractPlan.java +++ b/core/src/main/java/org/opensearch/sql/executor/execution/AbstractPlan.java @@ -7,6 +7,7 @@ import lombok.Getter; import lombok.RequiredArgsConstructor; +import lombok.Setter; import org.opensearch.sql.ast.statement.ExplainMode; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.executor.ExecutionEngine; @@ -22,6 +23,15 @@ public abstract class AbstractPlan { @Getter protected final QueryType queryType; + /** + * Extra search-source JSON from the PPL request body. Set by PPLService before submitting the + * plan to the query manager. The plan carries this across the thread boundary (REST handler + * thread → sql-worker thread), and the worker thread sets it as a ThreadLocal before Calcite + * planning and execution begin. The JSON is later parsed via {@code + * SearchSourceBuilder.fromXContent()} and selectively merged into the index scan request. + */ + @Getter @Setter private String extraSearchSource; + /** Start query execution. */ public abstract void execute(); diff --git a/core/src/main/java/org/opensearch/sql/executor/execution/ExplainPlan.java b/core/src/main/java/org/opensearch/sql/executor/execution/ExplainPlan.java index 27f7a47e504..95ac07ae8cc 100644 --- a/core/src/main/java/org/opensearch/sql/executor/execution/ExplainPlan.java +++ b/core/src/main/java/org/opensearch/sql/executor/execution/ExplainPlan.java @@ -6,6 +6,7 @@ package org.opensearch.sql.executor.execution; import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.QueryId; @@ -34,7 +35,19 @@ public ExplainPlan( @Override public void execute() { - plan.explain(explainListener, mode); + setExtraSearchSourceThreadLocal(); + try { + plan.explain(explainListener, mode); + } finally { + CalcitePlanContext.clearExtraSearchSource(); + } + } + + private void setExtraSearchSourceThreadLocal() { + String extra = getExtraSearchSource(); + if (extra != null) { + CalcitePlanContext.setExtraSearchSource(extra); + } } @Override diff --git a/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java b/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java index 5896871f5d0..4d1667df57a 100644 --- a/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java +++ b/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java @@ -10,6 +10,7 @@ import org.opensearch.sql.ast.statement.ExplainMode; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.QueryId; @@ -60,10 +61,15 @@ public QueryPlan( @Override public void execute() { - if (pageSize.isPresent()) { - queryService.execute(new Paginate(pageSize.get(), plan), getQueryType(), listener); - } else { - queryService.execute(plan, getQueryType(), listener); + setExtraSearchSourceThreadLocal(); + try { + if (pageSize.isPresent()) { + queryService.execute(new Paginate(pageSize.get(), plan), getQueryType(), listener); + } else { + queryService.execute(plan, getQueryType(), listener); + } + } finally { + CalcitePlanContext.clearExtraSearchSource(); } } @@ -78,4 +84,11 @@ public void explain( queryService.explain(plan, getQueryType(), listener, mode); } } + + private void setExtraSearchSourceThreadLocal() { + String extra = getExtraSearchSource(); + if (extra != null) { + CalcitePlanContext.setExtraSearchSource(extra); + } + } } diff --git a/core/src/main/java/org/opensearch/sql/expression/HighlightExpression.java b/core/src/main/java/org/opensearch/sql/expression/HighlightExpression.java index 79cc07f048b..6a6ad43ea49 100644 --- a/core/src/main/java/org/opensearch/sql/expression/HighlightExpression.java +++ b/core/src/main/java/org/opensearch/sql/expression/HighlightExpression.java @@ -23,6 +23,9 @@ /** Highlight Expression. */ @Getter public class HighlightExpression extends FunctionExpression { + /** The field name used to store highlight data on ExprTupleValue rows. */ + public static final String HIGHLIGHT_FIELD = "_highlight"; + private final Expression highlightField; private final ExprType type; @@ -46,7 +49,7 @@ public HighlightExpression(Expression highlightField) { */ @Override public ExprValue valueOf(Environment valueEnv) { - String refName = "_highlight"; + String refName = HIGHLIGHT_FIELD; // Not a wilcard expression if (this.type == ExprCoreType.ARRAY) { refName += "." + StringUtils.unquoteText(getHighlightField().toString()); diff --git a/core/src/test/java/org/opensearch/sql/executor/execution/ExplainPlanTest.java b/core/src/test/java/org/opensearch/sql/executor/execution/ExplainPlanTest.java index 4cb5c755d14..bba5217051e 100644 --- a/core/src/test/java/org/opensearch/sql/executor/execution/ExplainPlanTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/execution/ExplainPlanTest.java @@ -6,17 +6,21 @@ package org.opensearch.sql.executor.execution; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import java.util.concurrent.atomic.AtomicReference; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.QueryId; @@ -56,4 +60,36 @@ public void explainThrowException() { }); assertEquals("explain query can not been explained.", unsupportedExplainException.getMessage()); } + + @Test + public void execute_sets_extra_search_source_threadlocal_for_explain() { + String extraSearchSource = "{\"highlight\":{\"fields\":{\"*\":{}}}}"; + AtomicReference captured = new AtomicReference<>(); + + doAnswer( + invocation -> { + captured.set(CalcitePlanContext.getExtraSearchSource()); + return null; + }) + .when(queryPlan) + .explain(any(), any()); + + ExplainPlan explainPlan = new ExplainPlan(queryId, queryType, queryPlan, mode, explainListener); + explainPlan.setExtraSearchSource(extraSearchSource); + explainPlan.execute(); + + assertEquals(extraSearchSource, captured.get()); + } + + @Test + public void execute_clears_extra_search_source_threadlocal_after_explain() { + String extraSearchSource = "{\"highlight\":{\"fields\":{\"*\":{}}}}"; + doNothing().when(queryPlan).explain(any(), any()); + + ExplainPlan explainPlan = new ExplainPlan(queryId, queryType, queryPlan, mode, explainListener); + explainPlan.setExtraSearchSource(extraSearchSource); + explainPlan.execute(); + + assertNull(CalcitePlanContext.getExtraSearchSource()); + } } diff --git a/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java b/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java index 77deb9b6a48..edbf95996e5 100644 --- a/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java @@ -5,14 +5,18 @@ package org.opensearch.sql.executor.execution; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.lang3.NotImplementedException; import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; @@ -22,6 +26,7 @@ import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.ast.statement.ExplainMode; import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.executor.DefaultExecutionEngine; import org.opensearch.sql.executor.ExecutionEngine; @@ -126,4 +131,35 @@ public void onFailure(Exception e) { }, mode); } + + @Test + public void execute_sets_extra_search_source_threadlocal() { + String extraSearchSource = "{\"highlight\":{\"fields\":{\"*\":{}}}}"; + AtomicReference captured = new AtomicReference<>(); + + doAnswer( + invocation -> { + captured.set(CalcitePlanContext.getExtraSearchSource()); + return null; + }) + .when(queryService) + .execute(any(), any(), any()); + + QueryPlan query = new QueryPlan(queryId, queryType, plan, queryService, queryListener); + query.setExtraSearchSource(extraSearchSource); + query.execute(); + + assertEquals(extraSearchSource, captured.get()); + } + + @Test + public void execute_clears_extra_search_source_threadlocal_after_execution() { + String extraSearchSource = "{\"highlight\":{\"fields\":{\"*\":{}}}}"; + + QueryPlan query = new QueryPlan(queryId, queryType, plan, queryService, queryListener); + query.setExtraSearchSource(extraSearchSource); + query.execute(); + + assertNull(CalcitePlanContext.getExtraSearchSource()); + } } diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index d5958ba3250..dc64568bcc1 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -53,7 +53,152 @@ Expected output: } ``` -## Explain +## Highlight + +### Description + +You can include an optional `highlight` object in the request body to request search result highlighting. When a PPL query contains a full-text search (e.g., `search source=logs "error"`), OpenSearch identifies where the search terms appear in document fields and returns the matching fragments wrapped in the specified tags. + +The `highlight` object is passed as-is to OpenSearch. See the [OpenSearch highlighting documentation](https://docs.opensearch.org/latest/search-plugins/searching-data/highlight/) for all supported parameters. + +### What gets highlighted + +- **Full-text search terms only.** The search term in `search source=logs "error"` is translated to a `query_string` query, and OpenSearch's highlighter identifies matches. Structured filters (`where`, `stats`, comparison operators) do not produce highlights. +- **`text` and `keyword` fields only.** Numeric, date, boolean, and other non-string field types never produce highlight fragments. + +### Example 1: Wildcard highlight + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X POST localhost:9200/_plugins/_ppl \ +-d '{ + "query": "search source=accounts \\\"Street\\\" | fields firstname, address", + "highlight": { + "fields": { "*": {} }, + "pre_tags": [""], + "post_tags": [""], + "fragment_size": 2147483647 + } +}' +``` + +Expected output: + +```json +{ + "schema": [ + { + "name": "firstname", + "type": "string" + }, + { + "name": "address", + "type": "string" + } + ], + "datarows": [ + [ + "Hattie", + "671 Bristol Street" + ], + [ + "Nanette", + "789 Madison Street" + ] + ], + "highlights": [ + { + "address": [ + "671 Bristol Street" + ] + }, + { + "address": [ + "789 Madison Street" + ] + } + ], + "total": 2, + "size": 2 +} +``` + +### Example 2: Specific field with custom tags + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X POST localhost:9200/_plugins/_ppl \ +-d '{ + "query": "search source=accounts \\\"Street\\\" | fields firstname, address", + "highlight": { + "fields": { "address": {} }, + "pre_tags": [""], + "post_tags": [""] + } +}' +``` + +Expected output: + +```json +{ + "schema": [ + { + "name": "firstname", + "type": "string" + }, + { + "name": "address", + "type": "string" + } + ], + "datarows": [ + [ + "Hattie", + "671 Bristol Street" + ], + [ + "Nanette", + "789 Madison Street" + ] + ], + "highlights": [ + { + "address": [ + "671 Bristol Street" + ] + }, + { + "address": [ + "789 Madison Street" + ] + } + ], + "total": 2, + "size": 2 +} +``` + +Only the `address` field is highlighted. The `` custom tags are used instead of the default `` tags. + +### Response format + +- The `highlights` array is parallel to `datarows` — each entry corresponds to the row at the same index. +- Entries are `null` when a row has no highlight data for the requested fields. +- The `highlights` array is **omitted entirely** when no `highlight` config is provided in the request (backward compatible). When no highlight config is provided, there is zero impact on query processing — no extra columns, no extra overhead. + +### Behavior with piped commands + +- Piped commands (`where`, `sort`, `head`, `dedup`) narrow or reorder the result set but do not affect which terms are highlighted. Only the full-text search term produces highlights. +- Explicit field projection (`| fields name, age`) preserves highlight data, consistent with DSL where `_source` filtering does not affect highlights. + +### Limitations + +- Highlighting is supported only in the Calcite engine. +- The backend forwards the highlight config as-is to OpenSearch. The same highlighting behavior and limitations as [OpenSearch's highlighting API](https://docs.opensearch.org/latest/search-plugins/searching-data/highlight/) apply. +- Highlighting works with **single-source queries only**, consistent with DSL where highlighting is inherently single-index per request. Behavior with joins (`| join`), subqueries, and multi-source queries is not validated. + +## Explain ### Description diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java new file mode 100644 index 00000000000..617123c45ba --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java @@ -0,0 +1,182 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; + +import java.io.IOException; +import java.util.Locale; +import org.json.JSONArray; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.client.Request; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.Response; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalcitePPLHighlightIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.ACCOUNT); + } + + @Test + public void testHighlightWildcardFieldsPresent() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\"", + "{\"fields\": {\"*\": {}}, \"pre_tags\": [\"\"], \"post_tags\": [\"\"]}"); + + assertTrue(result.has("highlights")); + JSONArray highlights = result.getJSONArray("highlights"); + assertEquals(result.getInt("size"), highlights.length()); + + boolean foundHighlight = false; + for (int i = 0; i < highlights.length(); i++) { + if (!highlights.isNull(i)) { + JSONObject hl = highlights.getJSONObject(i); + for (String key : hl.keySet()) { + String fragment = hl.getJSONArray(key).getString(0); + if (fragment.contains("") && fragment.contains("Street")) { + foundHighlight = true; + break; + } + } + if (foundHighlight) break; + } + } + assertTrue("Expected at least one highlight with tags", foundHighlight); + } + + @Test + public void testHighlightSpecificFieldOnly() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\"", + "{\"fields\": {\"address\": {}}, \"pre_tags\": [\"\"], \"post_tags\":" + + " [\"\"]}"); + + assertTrue(result.has("highlights")); + JSONArray highlights = result.getJSONArray("highlights"); + + boolean foundAddressHighlight = false; + for (int i = 0; i < highlights.length(); i++) { + if (!highlights.isNull(i)) { + JSONObject hl = highlights.getJSONObject(i); + // Only address field should be highlighted, not other text fields + assertFalse("Should not highlight firstname field", hl.has("firstname")); + if (hl.has("address")) { + String fragment = hl.getJSONArray("address").getString(0); + assertTrue("address highlight should contain tags", fragment.contains("")); + assertTrue("address highlight should contain Street", fragment.contains("Street")); + foundAddressHighlight = true; + } + } + } + assertTrue("Expected at least one row with address highlighted", foundAddressHighlight); + } + + @Test + public void testHighlightCustomTagsApplied() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\"", + "{\"fields\": {\"*\": {}}, \"pre_tags\": [\"\"], \"post_tags\":" + + " [\"\"]}"); + + assertTrue(result.has("highlights")); + JSONArray highlights = result.getJSONArray("highlights"); + + boolean foundCustomTag = false; + for (int i = 0; i < highlights.length(); i++) { + if (!highlights.isNull(i)) { + JSONObject hl = highlights.getJSONObject(i); + for (String key : hl.keySet()) { + String fragment = hl.getJSONArray(key).getString(0); + if (fragment.contains("")) { + foundCustomTag = true; + break; + } + } + if (foundCustomTag) break; + } + } + assertTrue("Expected custom tags in highlights", foundCustomTag); + } + + @Test + public void testHighlightOmittedWhenNotRequested() throws IOException { + JSONObject result = + executeQueryNoHighlight("search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\""); + + assertFalse("Should not have highlights when not requested", result.has("highlights")); + } + + @Test + public void testHighlightAlignedAfterPipedFilter() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\" | where age > 30", + "{\"fields\": {\"*\": {}}, \"pre_tags\": [\"\"], \"post_tags\": [\"\"]}"); + + assertTrue(result.has("highlights")); + assertTrue(result.getInt("size") > 0); + JSONArray highlights = result.getJSONArray("highlights"); + assertEquals(result.getInt("size"), highlights.length()); + } + + @Test + public void testExplainContainsHighlightClause() throws IOException { + Request request = new Request("POST", "/_plugins/_ppl/_explain"); + request.setJsonEntity( + String.format( + Locale.ROOT, + "{\"query\": \"search source=%s \\\"Street\\\"\"," + + "\"highlight\": {\"fields\": {\"*\": {}}," + + "\"pre_tags\": [\"\"], \"post_tags\": [\"\"]}}", + TEST_INDEX_ACCOUNT)); + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader("Content-Type", "application/json"); + request.setOptions(restOptionsBuilder); + Response response = client().performRequest(request); + assertEquals(200, response.getStatusLine().getStatusCode()); + + String body = org.opensearch.sql.legacy.TestUtils.getResponseBody(response, true); + assertTrue("Explain should contain highlight", body.contains("highlight")); + } + + private JSONObject executeQueryWithHighlight(String query, String highlightJson) + throws IOException { + Request request = new Request("POST", "/_plugins/_ppl"); + request.setJsonEntity( + String.format(Locale.ROOT, "{\"query\": \"%s\", \"highlight\": %s}", query, highlightJson)); + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader("Content-Type", "application/json"); + request.setOptions(restOptionsBuilder); + Response response = client().performRequest(request); + assertEquals(200, response.getStatusLine().getStatusCode()); + String body = org.opensearch.sql.legacy.TestUtils.getResponseBody(response, true); + return new JSONObject(body); + } + + private JSONObject executeQueryNoHighlight(String query) throws IOException { + Request request = new Request("POST", "/_plugins/_ppl"); + request.setJsonEntity(String.format(Locale.ROOT, "{\"query\": \"%s\"}", query)); + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader("Content-Type", "application/json"); + request.setOptions(restOptionsBuilder); + Response response = client().performRequest(request); + assertEquals(200, response.getStatusLine().getStatusCode()); + String body = org.opensearch.sql.legacy.TestUtils.getResponseBody(response, true); + return new JSONObject(body); + } +} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl_highlight.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl_highlight.yml new file mode 100644 index 00000000000..63d102fbfae --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl_highlight.yml @@ -0,0 +1,108 @@ +setup: + - do: + indices.create: + index: ppl_highlight_test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + message: + type: text + status: + type: text + code: + type: integer + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + + - do: + bulk: + index: ppl_highlight_test + refresh: true + body: + - '{"index": {}}' + - '{"message": "Connection error occurred", "status": "error response", "code": 500}' + - '{"index": {}}' + - '{"message": "Request completed successfully", "status": "ok", "code": 200}' + - '{"index": {}}' + - '{"message": "Timeout error in service", "status": "error timeout", "code": 504}' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"PPL search with wildcard highlight": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'search source=ppl_highlight_test "error" | sort code' + highlight: + fields: + "*": {} + pre_tags: + - "" + post_tags: + - "" + + - match: {"size": 2} + - length: {highlights: 2} + - match: {highlights.0.message.0: "Connection error occurred"} + - match: {highlights.0.status.0: "error response"} + - match: {highlights.1.message.0: "Timeout error in service"} + - match: {highlights.1.status.0: "error timeout"} + +--- +"PPL search without highlight is backward compatible": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'search source=ppl_highlight_test "error"' + + - match: {"size": 2} + - is_false: highlights + +--- +"PPL search with custom tags": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'search source=ppl_highlight_test "error" | sort code' + highlight: + fields: + "*": {} + pre_tags: + - "" + post_tags: + - "" + + - match: {"size": 2} + - length: {highlights: 2} + - match: {highlights.0.message.0: "Connection error occurred"} + - match: {highlights.0.status.0: "error response"} + - match: {highlights.1.message.0: "Timeout error in service"} + - match: {highlights.1.status.0: "error timeout"} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java index e3a47337162..78da41f74a5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java @@ -52,6 +52,7 @@ import org.opensearch.sql.executor.ExecutionEngine.Schema.Column; import org.opensearch.sql.executor.Explain; import org.opensearch.sql.executor.pagination.PlanSerializer; +import org.opensearch.sql.expression.HighlightExpression; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLFuncImpTable; import org.opensearch.sql.monitor.profile.MetricName; @@ -266,10 +267,19 @@ private QueryResponse buildResultSet( List values = new ArrayList<>(); // Iterate through the ResultSet while (resultSet.next() && (querySizeLimit == null || values.size() < querySizeLimit)) { - Map row = new LinkedHashMap(); + Map row = new LinkedHashMap<>(); // Loop through each column for (int i = 1; i <= columnCount; i++) { String columnName = metaData.getColumnName(i); + // _highlight flows through the Calcite pipeline as a hidden column (SqlTypeName.ANY). + // Extract it as an opaque ExprValue and embed it in the row tuple directly. + if (HighlightExpression.HIGHLIGHT_FIELD.equals(columnName)) { + Object value = resultSet.getObject(i); + if (value instanceof ExprValue hl && !hl.isMissing()) { + row.put(HighlightExpression.HIGHLIGHT_FIELD, hl); + } + continue; + } Object value = resultSet.getObject(columnName); Object converted = processValue(value); ExprValue exprValue = ExprValueUtils.fromObjectValue(converted); @@ -281,6 +291,10 @@ private QueryResponse buildResultSet( List columns = new ArrayList<>(metaData.getColumnCount()); for (int i = 1; i <= columnCount; ++i) { String columnName = metaData.getColumnName(i); + // Exclude _highlight from the response schema — it's a hidden column. + if (HighlightExpression.HIGHLIGHT_FIELD.equals(columnName)) { + continue; + } RelDataType fieldType = fieldTypes.get(i - 1); // TODO: Correct this after fixing issue github.com/opensearch-project/sql/issues/3751 // The element type of struct and array is currently set to ANY. @@ -299,8 +313,7 @@ private QueryResponse buildResultSet( columns.add(new Column(columnName, null, exprType)); } Schema schema = new Schema(columns); - QueryResponse response = new QueryResponse(schema, values, null); - return response; + return new QueryResponse(schema, values, null); } /** Registers opensearch-dependent functions */ diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java index 0a47dc64a5e..1422be8ab45 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java @@ -5,6 +5,7 @@ package org.opensearch.sql.opensearch.response; +import static org.opensearch.sql.expression.HighlightExpression.HIGHLIGHT_FIELD; import static org.opensearch.sql.opensearch.storage.OpenSearchIndex.METADATAFIELD_TYPE_MAP; import static org.opensearch.sql.opensearch.storage.OpenSearchIndex.METADATA_FIELD_ID; import static org.opensearch.sql.opensearch.storage.OpenSearchIndex.METADATA_FIELD_INDEX; @@ -200,7 +201,7 @@ private void addHighlightsToBuilder( .map(Text::toString) .collect(Collectors.toList()))); } - builder.put("_highlight", ExprTupleValue.fromExprValueMap(hlBuilder.build())); + builder.put(HIGHLIGHT_FIELD, ExprTupleValue.fromExprValueMap(hlBuilder.build())); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 3ab40caee27..9028e80b18b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -5,7 +5,9 @@ package org.opensearch.sql.opensearch.storage.scan; +import static java.util.Collections.emptyList; import static java.util.Objects.requireNonNull; +import static org.opensearch.core.xcontent.DeprecationHandler.IGNORE_DEPRECATIONS; import static org.opensearch.sql.common.setting.Settings.Key.CALCITE_PUSHDOWN_ROWCOUNT_ESTIMATION_FACTOR; import static org.opensearch.sql.opensearch.request.PredicateAnalyzer.ScriptQueryExpression.getScriptSortType; import static org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper.MISSING_MAX; @@ -45,15 +47,23 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.search.SearchModule; +import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.sort.ScoreSortBuilder; import org.opensearch.search.sort.ScriptSortBuilder.ScriptSortType; import org.opensearch.search.sort.SortBuilder; import org.opensearch.search.sort.SortBuilders; import org.opensearch.search.sort.SortOrder; +import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.plan.AliasFieldsWrappable; import org.opensearch.sql.common.setting.Settings.Key; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; import org.opensearch.sql.opensearch.request.PredicateAnalyzer; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; import org.opensearch.sql.opensearch.storage.scan.context.AbstractAction; @@ -72,6 +82,9 @@ @Getter public abstract class AbstractCalciteIndexScan extends TableScan implements AliasFieldsWrappable { private static final Logger LOG = LogManager.getLogger(AbstractCalciteIndexScan.class); + + private static final NamedXContentRegistry X_CONTENT_REGISTRY = + new NamedXContentRegistry(new SearchModule(Settings.EMPTY, emptyList()).getNamedXContents()); public final OpenSearchIndex osIndex; // The schema of this scan operator, it's initialized with the row type of the table, but may be // changed by push down operations. @@ -106,13 +119,39 @@ public RelDataType deriveRowType() { public RelWriter explainTerms(RelWriter pw) { String explainString = String.valueOf(pushDownContext); if (pw instanceof RelWriterImpl) { - // Only add request builder to the explain plan - explainString += ", " + pushDownContext.createRequestBuilder(); + OpenSearchRequestBuilder requestBuilder = pushDownContext.createRequestBuilder(); + applyExtraSearchSource(requestBuilder); + explainString += ", " + requestBuilder; } return super.explainTerms(pw) .itemIf("PushDownContext", explainString, !pushDownContext.isEmpty()); } + /** + * Apply extra search-source JSON from the ThreadLocal to the OpenSearch request builder. The JSON + * is parsed via {@code SearchSourceBuilder.fromXContent()} and any recognized clauses (highlight, + * suggest, rescore, etc.) are selectively merged into the target request. + * + * @param requestBuilder the OpenSearch request builder to merge extra clauses into + */ + protected static void applyExtraSearchSource(OpenSearchRequestBuilder requestBuilder) { + String json = CalcitePlanContext.getExtraSearchSource(); + if (json == null) { + return; + } + try { + XContentParser parser = + XContentType.JSON.xContent().createParser(X_CONTENT_REGISTRY, IGNORE_DEPRECATIONS, json); + SearchSourceBuilder extra = SearchSourceBuilder.fromXContent(parser); + SearchSourceBuilder target = requestBuilder.getSourceBuilder(); + if (extra.highlighter() != null) { + target.highlighter(extra.highlighter()); + } + } catch (Exception e) { + LOG.warn("Failed to parse extra search source JSON, skipping: {}", e.getMessage()); + } + } + protected Integer getQuerySizeLimit() { return osIndex.getSettings().getSettingValue(Key.QUERY_SIZE_LIMIT); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java index 7ba75e46ba0..0949c458084 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java @@ -119,6 +119,7 @@ public Result implement(EnumerableRelImplementor implementor, Prefer pref) { @Override public Enumerator enumerator() { OpenSearchRequestBuilder requestBuilder = pushDownContext.createRequestBuilder(); + applyExtraSearchSource(requestBuilder); return new OpenSearchIndexEnumerator( osIndex.getClient(), getRowType().getFieldNames(), diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index dbe8306d4b2..7c0d1d629a1 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -5,6 +5,8 @@ package org.opensearch.sql.opensearch.storage.scan; +import static org.opensearch.sql.expression.HighlightExpression.HIGHLIGHT_FIELD; + import com.google.common.collect.ImmutableList; import java.util.ArrayList; import java.util.List; @@ -35,11 +37,13 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; +import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.calcite.utils.PPLHintUtils; import org.opensearch.sql.common.setting.Settings; @@ -78,10 +82,26 @@ public CalciteLogicalIndexScan( ImmutableList.of(), table, osIndex, - table.getRowType(), + buildInitialSchema(cluster, table), new PushDownContext(osIndex)); } + /** + * Build the initial schema for the scan, conditionally adding a hidden {@code _highlight} column + * when highlight configuration is present. This allows highlight data to flow through all Calcite + * operators (filter, sort, dedup) as a regular column, avoiding positional misalignment issues. + */ + private static RelDataType buildInitialSchema(RelOptCluster cluster, RelOptTable table) { + RelDataType base = table.getRowType(); + if (!CalcitePlanContext.hasHighlightInExtraSearchSource()) { + return base; + } + RelDataTypeFactory.Builder builder = cluster.getTypeFactory().builder(); + builder.addAll(base.getFieldList()); + builder.add(HIGHLIGHT_FIELD, SqlTypeName.ANY); + return builder.build(); + } + protected CalciteLogicalIndexScan( RelOptCluster cluster, RelTraitSet traitSet, @@ -276,10 +296,13 @@ public CalciteLogicalIndexScan pushDownProject(List selectedColumns) { // For aggregate, we do nothing on query builder but only change the schema of the scan. action = (AggregationBuilderAction) aggAction -> {}; } else { + // Filter _highlight from _source includes — it's not a source field in OpenSearch. action = (OSRequestBuilderAction) requestBuilder -> - requestBuilder.pushDownProjectStream(newSchema.getFieldNames().stream()); + requestBuilder.pushDownProjectStream( + newSchema.getFieldNames().stream() + .filter(name -> !HIGHLIGHT_FIELD.equals(name))); } newScan.pushDownContext.add( PushDownType.PROJECT, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java index 05bd00dcf2c..d8bcfe6b280 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java @@ -5,9 +5,12 @@ package org.opensearch.sql.opensearch.storage.scan; +import static org.opensearch.sql.expression.HighlightExpression.HIGHLIGHT_FIELD; + import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.Map; import lombok.EqualsAndHashCode; import lombok.ToString; import org.apache.calcite.linq4j.Enumerator; @@ -88,7 +91,19 @@ public Object current() { if (fields.size() == 1) { return resolveForCalcite(current, fields.getFirst()); } - return fields.stream().map(field -> resolveForCalcite(current, field)).toArray(); + return fields.stream() + .map( + field -> { + // _highlight is carried as an opaque ExprValue through the Calcite row pipeline. + // It uses SqlTypeName.ANY so Calcite passes it through without conversion. + if (HIGHLIGHT_FIELD.equals(field)) { + Map tuple = ExprValueUtils.getTupleValue(current); + ExprValue hl = tuple.get(HIGHLIGHT_FIELD); + return (hl != null && !hl.isMissing()) ? (Object) hl : null; + } + return resolveForCalcite(current, field); + }) + .toArray(); } private Object resolveForCalcite(ExprValue value, String rawPath) { diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScanExtraSearchSourceTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScanExtraSearchSourceTest.java new file mode 100644 index 00000000000..ad0b5e4171d --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScanExtraSearchSourceTest.java @@ -0,0 +1,100 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.mockito.Mockito.when; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; + +@ExtendWith(MockitoExtension.class) +class AbstractCalciteIndexScanExtraSearchSourceTest { + + @Mock private OpenSearchRequestBuilder requestBuilder; + + @AfterEach + void cleanup() { + CalcitePlanContext.clearExtraSearchSource(); + } + + @Test + void applyExtraSearchSource_withNullConfig_doesNothing() { + // No ThreadLocal set — config is null, requestBuilder should not be called + AbstractCalciteIndexScan.applyExtraSearchSource(requestBuilder); + // If config is null, method returns early — no interaction with requestBuilder + } + + @Test + void applyExtraSearchSource_withWildcardFields_setsHighlighter() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + when(requestBuilder.getSourceBuilder()).thenReturn(sourceBuilder); + + CalcitePlanContext.setExtraSearchSource("{\"highlight\":{\"fields\":{\"*\":{}}}}"); + AbstractCalciteIndexScan.applyExtraSearchSource(requestBuilder); + + HighlightBuilder highlighter = sourceBuilder.highlighter(); + assertNotNull(highlighter); + assertEquals(1, highlighter.fields().size()); + assertEquals("*", highlighter.fields().get(0).name()); + } + + @Test + void applyExtraSearchSource_withCustomTags_setsTags() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + when(requestBuilder.getSourceBuilder()).thenReturn(sourceBuilder); + + CalcitePlanContext.setExtraSearchSource( + "{\"highlight\":{\"fields\":{\"message\":{}},\"pre_tags\":[\"\"],\"post_tags\":[\"\"]}}"); + AbstractCalciteIndexScan.applyExtraSearchSource(requestBuilder); + + HighlightBuilder highlighter = sourceBuilder.highlighter(); + assertArrayEquals(new String[] {""}, highlighter.preTags()); + assertArrayEquals(new String[] {""}, highlighter.postTags()); + } + + @Test + void applyExtraSearchSource_withFragmentSize_setsOnHighlighter() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + when(requestBuilder.getSourceBuilder()).thenReturn(sourceBuilder); + + CalcitePlanContext.setExtraSearchSource( + "{\"highlight\":{\"fields\":{\"*\":{}},\"fragment_size\":2147483647}}"); + AbstractCalciteIndexScan.applyExtraSearchSource(requestBuilder); + + HighlightBuilder highlighter = sourceBuilder.highlighter(); + assertEquals(1, highlighter.fields().size()); + // fragment_size is parsed as a top-level HighlightBuilder setting by native XContent parsing + assertEquals(Integer.valueOf(2147483647), highlighter.fragmentSize()); + } + + @Test + void applyExtraSearchSource_withMalformedJson_handlesGracefully() { + // Malformed JSON — should not throw, just log warning and skip + CalcitePlanContext.setExtraSearchSource("{not valid json}}}"); + AbstractCalciteIndexScan.applyExtraSearchSource(requestBuilder); + // No exception thrown — method handles gracefully + } + + @Test + void applyExtraSearchSource_withNoHighlight_doesNotSetHighlighter() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + // JSON with no highlight clause — nothing to merge + CalcitePlanContext.setExtraSearchSource("{\"size\":10}"); + AbstractCalciteIndexScan.applyExtraSearchSource(requestBuilder); + assertNull(sourceBuilder.highlighter()); + } +} diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java b/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java index d6f025a4540..455eebc97a5 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java @@ -64,7 +64,9 @@ public void execute( ResponseListener queryListener, ResponseListener explainListener) { try { - queryManager.submit(plan(request, queryListener, explainListener)); + AbstractPlan plan = plan(request, queryListener, explainListener); + setExtraSearchSourceOnPlan(plan, request); + queryManager.submit(plan); } catch (Exception e) { queryListener.onFailure(e); } @@ -79,12 +81,25 @@ public void execute( */ public void explain(PPLQueryRequest request, ResponseListener listener) { try { - queryManager.submit(plan(request, NO_CONSUMER_RESPONSE_LISTENER, listener)); + AbstractPlan plan = plan(request, NO_CONSUMER_RESPONSE_LISTENER, listener); + setExtraSearchSourceOnPlan(plan, request); + queryManager.submit(plan); } catch (Exception e) { listener.onFailure(e); } } + /** + * Set extra search source on the plan so it can be carried across the thread boundary. The plan's + * execute() method will set the ThreadLocal on the worker thread. + */ + private void setExtraSearchSourceOnPlan(AbstractPlan plan, PPLQueryRequest request) { + String extra = request.getExtraSearchSource(); + if (extra != null) { + plan.setExtraSearchSource(extra); + } + } + private AbstractPlan plan( PPLQueryRequest request, ResponseListener queryListener, diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java b/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java index caf666b3b4e..c236474bc5c 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java @@ -109,4 +109,23 @@ public int getFetchSize() { } return jsonContent.optInt(FETCH_SIZE_FIELD, 0); } + + /** + * Get extra search-source-compatible JSON from the request body. Currently wraps the {@code + * highlight} field; future extensions (suggest, rescore, post_filter, etc.) can be added here. + * + * @return search-source JSON string, or null if no extra fields are present + */ + public String getExtraSearchSource() { + if (jsonContent == null) { + return null; + } + JSONObject highlight = jsonContent.optJSONObject("highlight"); + if (highlight == null) { + return null; + } + JSONObject wrapper = new JSONObject(); + wrapper.put("highlight", highlight); + return wrapper.toString(); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java index 9fe3a4fef64..c0ca166641d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java @@ -6,6 +6,8 @@ package org.opensearch.sql.ppl.domain; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import org.json.JSONObject; @@ -92,4 +94,35 @@ public void testGetFetchSizeWithLargeValue() { PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); assertEquals(15000, request.getFetchSize()); } + + @Test + public void testGetExtraSearchSourceReturnsHighlightWrapper() { + JSONObject json = new JSONObject(); + json.put("query", "source=t \"error\""); + json.put( + "highlight", + new JSONObject( + "{\"fields\": {\"*\": {}}, \"pre_tags\": [\"\"], \"post_tags\": [\"\"]}")); + PPLQueryRequest request = new PPLQueryRequest("source=t \"error\"", json, "/_plugins/_ppl"); + String extra = request.getExtraSearchSource(); + assertNotNull(extra); + JSONObject parsed = new JSONObject(extra); + assertTrue(parsed.has("highlight")); + assertTrue(parsed.getJSONObject("highlight").has("fields")); + assertTrue(parsed.getJSONObject("highlight").has("pre_tags")); + assertTrue(parsed.getJSONObject("highlight").has("post_tags")); + } + + @Test + public void testGetExtraSearchSourceReturnsNullWhenNotSpecified() { + JSONObject json = new JSONObject("{\"query\": \"source=t\"}"); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + assertNull(request.getExtraSearchSource()); + } + + @Test + public void testGetExtraSearchSourceReturnsNullWhenJsonContentIsNull() { + PPLQueryRequest request = new PPLQueryRequest("source=t", null, "/_plugins/_ppl"); + assertNull(request.getExtraSearchSource()); + } } diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java index 53badf3950d..a026bddf46b 100644 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java @@ -5,11 +5,15 @@ package org.opensearch.sql.protocol.response; +import static org.opensearch.sql.expression.HighlightExpression.HIGHLIGHT_FIELD; + import java.util.Collection; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.stream.Collectors; import lombok.Getter; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; @@ -82,19 +86,48 @@ public Map columnNameTypes() { @Override public Iterator iterator() { - // Any chance to avoid copy for json response generation? return exprValues.stream() .map(ExprValueUtils::getTupleValue) - .map(Map::values) - .map(this::convertExprValuesToValues) + .map( + tuple -> + tuple.entrySet().stream() + .filter(e -> !HIGHLIGHT_FIELD.equals(e.getKey())) + .map(e -> e.getValue().value()) + .toArray(Object[]::new)) .iterator(); } - private String getColumnName(Column column) { - return (column.getAlias() != null) ? column.getAlias() : column.getName(); + /** + * Extract highlight data from each result row. Each row may contain a {@code _highlight} field + * added by {@code OpenSearchResponse.addHighlightsToBuilder()} and preserved through projection. + * Returns a list parallel to datarows where each entry is either a map of field name to highlight + * fragments, or null if no highlight data exists for that row. + * + * @return list of highlight maps, one per row + */ + public List> highlights() { + return exprValues.stream() + .map(ExprValueUtils::getTupleValue) + .map( + tuple -> { + ExprValue hl = tuple.get(HIGHLIGHT_FIELD); + if (hl == null || hl.isMissing() || hl.isNull()) { + return null; + } + Map hlMap = new LinkedHashMap<>(); + for (Map.Entry entry : hl.tupleValue().entrySet()) { + hlMap.put( + entry.getKey(), + entry.getValue().collectionValue().stream() + .map(ExprValue::stringValue) + .collect(Collectors.toList())); + } + return (Map) hlMap; + }) + .collect(Collectors.toList()); } - private Object[] convertExprValuesToValues(Collection exprValues) { - return exprValues.stream().map(ExprValue::value).toArray(Object[]::new); + private String getColumnName(Column column) { + return (column.getAlias() != null) ? column.getAlias() : column.getName(); } } diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatter.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatter.java index 8be22af5326..5c696035555 100644 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatter.java +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatter.java @@ -6,6 +6,8 @@ package org.opensearch.sql.protocol.response.format; import java.util.List; +import java.util.Map; +import java.util.Objects; import lombok.Builder; import lombok.Getter; import lombok.RequiredArgsConstructor; @@ -38,6 +40,12 @@ protected Object buildJsonObject(QueryResult response) { response.getSchema().getColumns().forEach(col -> json.column(fetchColumn(col))); json.datarows(fetchDataRows(response)); + // Populate highlights if present + List> highlights = response.highlights(); + if (highlights.stream().anyMatch(Objects::nonNull)) { + json.highlights(highlights); + } + // Populate other fields json.total(response.size()).size(response.size()).status(200); if (!response.getCursor().equals(Cursor.None)) { @@ -88,6 +96,7 @@ public static class JdbcResponse { private final List schema; private final Object[][] datarows; + private final List> highlights; private final long total; private final long size; private final int status; diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java index ff59ce4cddc..5e57d8aacd6 100644 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java @@ -6,6 +6,8 @@ package org.opensearch.sql.protocol.response.format; import java.util.List; +import java.util.Map; +import java.util.Objects; import lombok.Builder; import lombok.Getter; import lombok.RequiredArgsConstructor; @@ -54,6 +56,13 @@ public Object buildJsonObject(QueryResult response) { response.columnNameTypes().forEach((name, type) -> json.column(new Column(name, type))); json.datarows(fetchDataRows(response)); + + // Populate highlights if present + List> highlights = response.highlights(); + if (highlights.stream().anyMatch(Objects::nonNull)) { + json.highlights(highlights); + } + formatMetric.set(System.nanoTime() - formatTime); json.profile(QueryProfiling.current().finish()); @@ -79,6 +88,7 @@ public static class JsonResponse { private final List schema; private final Object[][] datarows; + private final List> highlights; private long total; private long size; diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java index fc3402e20a5..20e83d2f4a3 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java @@ -7,6 +7,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.fail; import static org.opensearch.sql.data.model.ExprValueUtils.tupleValue; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; @@ -16,7 +17,11 @@ import com.google.common.collect.ImmutableMap; import java.util.Arrays; import java.util.Collections; +import java.util.List; +import java.util.Map; import org.junit.jupiter.api.Test; +import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.pagination.Cursor; @@ -106,4 +111,164 @@ void iterate() { i++; } } + + @Test + void testIterateExcludesHighlightFromDatarows() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList( + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.stringValue("John"), + "age", + ExprValueUtils.integerValue(20), + "_highlight", + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.collectionValue( + ImmutableList.of("John"))))))), + Cursor.None); + + for (Object[] objects : response) { + // datarows should only have schema columns, not _highlight + assertArrayEquals(new Object[] {"John", 20}, objects); + } + } + + @Test + void testHighlightsReturnsSingleRowData() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList( + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.stringValue("John"), + "age", + ExprValueUtils.integerValue(20), + "_highlight", + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.collectionValue( + ImmutableList.of("John"))))))), + Cursor.None); + + List> highlights = response.highlights(); + assertEquals(1, highlights.size()); + assertEquals(ImmutableMap.of("name", ImmutableList.of("John")), highlights.get(0)); + } + + @Test + void testHighlightsReturnsNullWhenNoHighlightField() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList(tupleValue(ImmutableMap.of("name", "John", "age", 20))), + Cursor.None); + + List> highlights = response.highlights(); + assertEquals(1, highlights.size()); + assertNull(highlights.get(0)); + } + + @Test + void testHighlightsReturnsNullWhenHighlightIsMissing() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList( + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.stringValue("John"), + "age", + ExprValueUtils.integerValue(20), + "_highlight", + ExprValueUtils.LITERAL_MISSING))), + Cursor.None); + + List> highlights = response.highlights(); + assertEquals(1, highlights.size()); + assertNull(highlights.get(0)); + } + + @Test + void testHighlightsReturnsEmptyListWhenNoRows() { + QueryResult response = new QueryResult(schema, Collections.emptyList(), Cursor.None); + + List> highlights = response.highlights(); + assertEquals(0, highlights.size()); + } + + @Test + void testHighlightsReturnsNullWhenHighlightIsNull() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList( + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.stringValue("John"), + "age", + ExprValueUtils.integerValue(20), + "_highlight", + ExprValueUtils.LITERAL_NULL))), + Cursor.None); + + List> highlights = response.highlights(); + assertEquals(1, highlights.size()); + assertNull(highlights.get(0)); + } + + @Test + void testHighlightsMultiRowAlignment() { + QueryResult response = + new QueryResult( + schema, + Arrays.asList( + // Row 0: has highlight on name + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.stringValue("John"), + "age", + ExprValueUtils.integerValue(20), + "_highlight", + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.collectionValue( + ImmutableList.of("John")))))), + // Row 1: no highlight + tupleValue(ImmutableMap.of("name", "Allen", "age", 30)), + // Row 2: has highlight on age (as string field) + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.stringValue("Smith"), + "age", + ExprValueUtils.integerValue(40), + "_highlight", + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.collectionValue( + ImmutableList.of("Smith"))))))), + Cursor.None); + + List> highlights = response.highlights(); + assertEquals(3, highlights.size()); + // Row 0: highlight present + assertEquals(ImmutableMap.of("name", ImmutableList.of("John")), highlights.get(0)); + // Row 1: no highlight → null + assertNull(highlights.get(1)); + // Row 2: highlight present + assertEquals(ImmutableMap.of("name", ImmutableList.of("Smith")), highlights.get(2)); + } } diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatterTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatterTest.java index 16dd1590eea..b8347f32be7 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatterTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatterTest.java @@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableMap; import com.google.gson.JsonParser; import java.util.Arrays; +import java.util.Collections; import java.util.Map; import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; @@ -207,6 +208,55 @@ void format_server_error_response_due_to_opensearch() { "all shards failed", new IllegalStateException("Execution error")))); } + @Test + void format_response_with_highlights() { + QueryResult response = + new QueryResult( + new Schema( + ImmutableList.of( + new Column("name", null, STRING), new Column("age", null, INTEGER))), + Collections.singletonList( + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + stringValue("John"), + "age", + org.opensearch.sql.data.model.ExprValueUtils.integerValue(20), + "_highlight", + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + org.opensearch.sql.data.model.ExprValueUtils.collectionValue( + ImmutableList.of("John")))))))); + + assertJsonEquals( + "{" + + "\"schema\":[" + + "{\"name\":\"name\",\"type\":\"keyword\"}," + + "{\"name\":\"age\",\"type\":\"integer\"}" + + "]," + + "\"datarows\":[[\"John\",20]]," + + "\"highlights\":[{\"name\":[\"John\"]}]," + + "\"total\":1," + + "\"size\":1," + + "\"status\":200}", + formatter.format(response)); + } + + @Test + void format_response_without_highlights() { + QueryResult response = + new QueryResult( + new Schema( + ImmutableList.of( + new Column("name", null, STRING), new Column("age", null, INTEGER))), + Collections.singletonList(tupleValue(ImmutableMap.of("name", "John", "age", 20)))); + + // When no highlights, the highlights field should not appear in the JSON + String formatted = formatter.format(response); + assertEquals(false, formatted.contains("\"highlights\"")); + } + private static void assertJsonEquals(String expected, String actual) { assertEquals(JsonParser.parseString(expected), JsonParser.parseString(actual)); } diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java index e5eb0f1ac77..b89fb878f21 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java @@ -6,7 +6,11 @@ package org.opensearch.sql.protocol.response.format; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_MISSING; +import static org.opensearch.sql.data.model.ExprValueUtils.collectionValue; +import static org.opensearch.sql.data.model.ExprValueUtils.integerValue; import static org.opensearch.sql.data.model.ExprValueUtils.stringValue; import static org.opensearch.sql.data.model.ExprValueUtils.tupleValue; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; @@ -17,6 +21,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.Arrays; +import java.util.Collections; import org.junit.jupiter.api.Test; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.executor.ExecutionEngine; @@ -178,4 +183,38 @@ void formatErrorPretty() { + "}", formatter.format(new RuntimeException("This is an exception"))); } + + @Test + void testFormatResponseHighlightsPresent() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList( + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "firstname", + stringValue("John"), + "age", + integerValue(20), + "_highlight", + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "firstname", + collectionValue(ImmutableList.of("John")))))))); + SimpleJsonResponseFormatter formatter = new SimpleJsonResponseFormatter(COMPACT); + String result = formatter.format(response); + assertTrue(result.contains("\"highlights\"")); + assertTrue(result.contains("\"firstname\":[\"John\"]")); + } + + @Test + void testFormatResponseHighlightsOmittedWhenAbsent() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList(tupleValue(ImmutableMap.of("firstname", "John", "age", 20)))); + SimpleJsonResponseFormatter formatter = new SimpleJsonResponseFormatter(COMPACT); + String result = formatter.format(response); + assertFalse(result.contains("\"highlights\"")); + } }