From ac13a8a102d5f26c22dc90881cfa2b2b4f6552ac Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Thu, 12 Feb 2026 13:24:41 -0800 Subject: [PATCH 01/18] Calcite PPL search result highlighting Signed-off-by: Jialiang Liang --- .../sql/calcite/CalciteRelNodeVisitor.java | 5 ++ .../sql/calcite/utils/PPLHintUtils.java | 37 +++++++++- .../sql/expression/HighlightExpression.java | 5 +- .../executor/OpenSearchExecutionEngine.java | 18 +++++ .../response/OpenSearchResponse.java | 3 +- .../scan/AbstractCalciteIndexScan.java | 11 ++- .../storage/scan/CalciteLogicalIndexScan.java | 22 ++++++ .../scan/OpenSearchIndexEnumerator.java | 32 +++++++++ .../storage/scan/context/PushDownType.java | 4 +- .../sql/protocol/response/QueryResult.java | 47 +++++++++++-- .../format/JdbcResponseFormatter.java | 9 +++ .../format/SimpleJsonResponseFormatter.java | 10 +++ .../protocol/response/QueryResultTest.java | 69 +++++++++++++++++++ .../format/JdbcResponseFormatterTest.java | 50 ++++++++++++++ 14 files changed, 307 insertions(+), 15 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 84386dd0084..9a1c0f2a880 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -227,6 +227,11 @@ private RelBuilder scan(RelOptTable tableSchema, CalcitePlanContext context) { public RelNode visitSearch(Search node, CalcitePlanContext context) { // Visit the Relation child to get the scan node.getChild().get(0).accept(this, context); + + // Mark the scan as originating from a search command so that the optimizer + // can scope auto-highlight injection to search queries only. + PPLHintUtils.markSearchCommand(context.relBuilder); + // Create query_string function Function queryStringFunc = AstDSL.function( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java index 915c45e7083..31d213b32d8 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java @@ -6,10 +6,14 @@ package org.opensearch.sql.calcite.utils; import com.google.common.base.Suppliers; +import java.util.List; import java.util.function.Supplier; import lombok.experimental.UtilityClass; +import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.hint.HintStrategyTable; +import org.apache.calcite.rel.hint.Hintable; import org.apache.calcite.rel.hint.RelHint; import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.tools.RelBuilder; @@ -19,6 +23,7 @@ public class PPLHintUtils { private static final String HINT_AGG_ARGUMENTS = "AGG_ARGS"; private static final String KEY_IGNORE_NULL_BUCKET = "ignoreNullBucket"; private static final String KEY_HAS_NESTED_AGG_CALL = "hasNestedAggCall"; + public static final String HINT_SEARCH_COMMAND = "SEARCH_COMMAND"; private static final Supplier HINT_STRATEGY_TABLE = Suppliers.memoize( @@ -29,7 +34,7 @@ public class PPLHintUtils { (hint, rel) -> { return rel instanceof LogicalAggregate; }) - // add more here + .hintStrategy(HINT_SEARCH_COMMAND, (hint, rel) -> rel instanceof TableScan) .build()); /** @@ -81,4 +86,34 @@ public static boolean hasNestedAggCall(Aggregate aggregate) { .getOrDefault(KEY_HAS_NESTED_AGG_CALL, "false") .equals("true")); } + + /** + * Mark a scan node as originating from a PPL search command. The scan node may be on top of the + * relBuilder stack directly, or wrapped in a Project (due to alias field wrapping). This hint is + * used to scope auto-highlight injection to search command queries only. + */ + public static void markSearchCommand(RelBuilder relBuilder) { + final RelHint hint = RelHint.builder(HINT_SEARCH_COMMAND).build(); + RelNode top = relBuilder.peek(); + if (top instanceof Hintable) { + // Scan is directly on top of the stack + relBuilder.hints(hint); + } else if (top instanceof org.apache.calcite.rel.core.Project proj) { + RelNode input = proj.getInput(); + if (input instanceof Hintable hintable) { + RelNode hintedInput = hintable.attachHints(List.of(hint)); + RelNode newProject = proj.copy(proj.getTraitSet(), List.of(hintedInput)); + relBuilder.build(); // pop old project + relBuilder.push(newProject); + } + } + if (relBuilder.getCluster().getHintStrategies() == HintStrategyTable.EMPTY) { + relBuilder.getCluster().setHintStrategies(HINT_STRATEGY_TABLE.get()); + } + } + + /** Return true if the scan has the SEARCH_COMMAND hint. */ + public static boolean isSearchCommand(TableScan scan) { + return scan.getHints().stream().anyMatch(hint -> hint.hintName.equals(HINT_SEARCH_COMMAND)); + } } diff --git a/core/src/main/java/org/opensearch/sql/expression/HighlightExpression.java b/core/src/main/java/org/opensearch/sql/expression/HighlightExpression.java index 79cc07f048b..6a6ad43ea49 100644 --- a/core/src/main/java/org/opensearch/sql/expression/HighlightExpression.java +++ b/core/src/main/java/org/opensearch/sql/expression/HighlightExpression.java @@ -23,6 +23,9 @@ /** Highlight Expression. */ @Getter public class HighlightExpression extends FunctionExpression { + /** The field name used to store highlight data on ExprTupleValue rows. */ + public static final String HIGHLIGHT_FIELD = "_highlight"; + private final Expression highlightField; private final ExprType type; @@ -46,7 +49,7 @@ public HighlightExpression(Expression highlightField) { */ @Override public ExprValue valueOf(Environment valueEnv) { - String refName = "_highlight"; + String refName = HIGHLIGHT_FIELD; // Not a wilcard expression if (this.type == ExprCoreType.ARRAY) { refName += "." + StringUtils.unquoteText(getHighlightField().toString()); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java index e3a47337162..7e215096af1 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java @@ -52,6 +52,7 @@ import org.opensearch.sql.executor.ExecutionEngine.Schema.Column; import org.opensearch.sql.executor.Explain; import org.opensearch.sql.executor.pagination.PlanSerializer; +import org.opensearch.sql.expression.HighlightExpression; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLFuncImpTable; import org.opensearch.sql.monitor.profile.MetricName; @@ -62,6 +63,7 @@ import org.opensearch.sql.opensearch.executor.protector.ExecutionProtector; import org.opensearch.sql.opensearch.functions.DistinctCountApproxAggFunction; import org.opensearch.sql.opensearch.functions.GeoIpFunction; +import org.opensearch.sql.opensearch.storage.scan.OpenSearchIndexEnumerator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.storage.TableScanOperator; import org.opensearch.transport.client.node.NodeClient; @@ -210,6 +212,7 @@ public void execute( client.schedule( () -> { try (PreparedStatement statement = OpenSearchRelRunners.run(context, rel)) { + OpenSearchIndexEnumerator.clearCollectedHighlights(); ProfileMetric metric = QueryProfiling.current().getOrCreateMetric(MetricName.EXECUTE); long execTime = System.nanoTime(); ResultSet result = statement.executeQuery(); @@ -278,6 +281,21 @@ private QueryResponse buildResultSet( values.add(ExprTupleValue.fromExprValueMap(row)); } + // Merge highlight data collected by the enumerator back into ExprTupleValues. + // The Calcite row pipeline only carries schema column values, so highlight metadata + // is collected as a side channel in OpenSearchIndexEnumerator and merged here. + List collectedHighlights = + OpenSearchIndexEnumerator.getAndClearCollectedHighlights(); + for (int i = 0; i < Math.min(values.size(), collectedHighlights.size()); i++) { + ExprValue hl = collectedHighlights.get(i); + if (hl != null) { + Map rowWithHighlight = + new LinkedHashMap<>(ExprValueUtils.getTupleValue(values.get(i))); + rowWithHighlight.put(HighlightExpression.HIGHLIGHT_FIELD, hl); + values.set(i, ExprTupleValue.fromExprValueMap(rowWithHighlight)); + } + } + List columns = new ArrayList<>(metaData.getColumnCount()); for (int i = 1; i <= columnCount; ++i) { String columnName = metaData.getColumnName(i); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java index 0a47dc64a5e..1422be8ab45 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java @@ -5,6 +5,7 @@ package org.opensearch.sql.opensearch.response; +import static org.opensearch.sql.expression.HighlightExpression.HIGHLIGHT_FIELD; import static org.opensearch.sql.opensearch.storage.OpenSearchIndex.METADATAFIELD_TYPE_MAP; import static org.opensearch.sql.opensearch.storage.OpenSearchIndex.METADATA_FIELD_ID; import static org.opensearch.sql.opensearch.storage.OpenSearchIndex.METADATA_FIELD_INDEX; @@ -200,7 +201,7 @@ private void addHighlightsToBuilder( .map(Text::toString) .collect(Collectors.toList()))); } - builder.put("_highlight", ExprTupleValue.fromExprValueMap(hlBuilder.build())); + builder.put(HIGHLIGHT_FIELD, ExprTupleValue.fromExprValueMap(hlBuilder.build())); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 3ab40caee27..e89e5e49d3e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -130,7 +130,7 @@ public double estimateRowCount(RelMetadataQuery mq) { (rowCount, operation) -> switch (operation.type()) { case AGGREGATION -> mq.getRowCount((RelNode) operation.digest()); - case PROJECT, SORT, SORT_EXPR -> rowCount; + case PROJECT, SORT, SORT_EXPR, HIGHLIGHT -> rowCount; case SORT_AGG_METRICS -> NumberUtil.min(rowCount, osIndex.getQueryBucketSize().doubleValue()); // Refer the org.apache.calcite.rel.metadata.RelMdRowCount @@ -176,8 +176,8 @@ public double estimateRowCount(RelMetadataQuery mq) { dRows = mq.getRowCount((RelNode) operation.digest()); dCpu += dRows * getAggMultiplier(operation); } - // Ignored Project in cost accumulation, but it will affect the external cost - case PROJECT -> {} + // Ignored Project and Highlight in cost accumulation + case PROJECT, HIGHLIGHT -> {} case SORT -> dCpu += dRows; case SORT_AGG_METRICS -> { dRows = dRows * .9 / 10; // *.9 because always bucket IS_NOT_NULL @@ -266,6 +266,11 @@ public Map getAliasMapping() { return osIndex.getAliasMapping(); } + @Override + public RelNode withHints(List hintList) { + return buildScan(getCluster(), traitSet, hintList, table, osIndex, schema, pushDownContext); + } + public abstract AbstractCalciteIndexScan copy(); protected List getCollationNames(List collations) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index dbe8306d4b2..5a2ec10fbf1 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -40,6 +40,7 @@ import org.apache.logging.log4j.Logger; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; +import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.calcite.utils.PPLHintUtils; import org.opensearch.sql.common.setting.Settings; @@ -158,6 +159,27 @@ public AbstractRelNode pushDownFilter(Filter filter) { (OSRequestBuilderAction) requestBuilder -> requestBuilder.pushDownFilterForCalcite(queryExpression.builder())); + // Auto-inject wildcard highlight for PPL search command result highlighting. + // Only adds highlight when the scan is marked with a SEARCH_COMMAND hint + // (set by CalciteRelNodeVisitor.visitSearch), scoping it to the search command only. + // Uses OSD custom tags so the frontend getHighlightHtml() can convert to . + if (PPLHintUtils.isSearchCommand(this)) { + newScan.pushDownContext.add( + PushDownType.HIGHLIGHT, + "auto_highlight", + (OSRequestBuilderAction) + requestBuilder -> { + if (requestBuilder.getSourceBuilder().highlighter() == null) { + HighlightBuilder highlightBuilder = + new HighlightBuilder() + .field(new HighlightBuilder.Field("*").numOfFragments(0)) + .preTags("@opensearch-dashboards-highlighted-field@") + .postTags("@/opensearch-dashboards-highlighted-field@"); + requestBuilder.getSourceBuilder().highlighter(highlightBuilder); + } + }); + } + // If the query expression is partial, we need to replace the input of the filter with the // partial pushed scan and the filter condition with non-pushed-down conditions. if (queryExpression.isPartial()) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java index 05bd00dcf2c..e4d86ce0fad 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java @@ -5,9 +5,13 @@ package org.opensearch.sql.opensearch.storage.scan; +import static org.opensearch.sql.expression.HighlightExpression.HIGHLIGHT_FIELD; + +import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.Map; import lombok.EqualsAndHashCode; import lombok.ToString; import org.apache.calcite.linq4j.Enumerator; @@ -27,6 +31,27 @@ */ public class OpenSearchIndexEnumerator implements Enumerator { + /** + * Thread-local collector for highlight data. Since the Calcite row pipeline only carries schema + * column values, highlight metadata from OpenSearch hits is collected here as a side channel. + * After execution, {@link #getAndClearCollectedHighlights()} retrieves the collected data so it + * can be merged back into the ExprTupleValues for the JDBC response. + */ + private static final ThreadLocal> COLLECTED_HIGHLIGHTS = + ThreadLocal.withInitial(ArrayList::new); + + /** Retrieve collected highlights and clear the ThreadLocal. */ + public static List getAndClearCollectedHighlights() { + List result = new ArrayList<>(COLLECTED_HIGHLIGHTS.get()); + COLLECTED_HIGHLIGHTS.get().clear(); + return result; + } + + /** Clear collected highlights (call before starting a new execution). */ + public static void clearCollectedHighlights() { + COLLECTED_HIGHLIGHTS.get().clear(); + } + /** OpenSearch client. */ private final OpenSearchClient client; @@ -111,6 +136,12 @@ public boolean moveNext() { } if (iterator.hasNext()) { current = iterator.next(); + // Collect highlight data as a side channel for the JDBC response. + // The Calcite row (from current()) only carries schema column values, + // so _highlight must be preserved separately. + Map tuple = ExprValueUtils.getTupleValue(current); + ExprValue hl = tuple.get(HIGHLIGHT_FIELD); + COLLECTED_HIGHLIGHTS.get().add(hl != null && !hl.isMissing() ? hl : null); queryCount++; return true; } else { @@ -123,6 +154,7 @@ public void reset() { bgScanner.reset(request); iterator = bgScanner.fetchNextBatch(request).iterator(); queryCount = 0; + COLLECTED_HIGHLIGHTS.get().clear(); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java index c763808164d..d6817b8eeab 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java @@ -15,7 +15,7 @@ public enum PushDownType { SCRIPT, // script in predicate SORT_AGG_METRICS, // convert composite aggregate to terms or multi-terms bucket aggregate RARE_TOP, // convert composite aggregate to nested aggregate - SORT_EXPR - // HIGHLIGHT, + SORT_EXPR, + HIGHLIGHT // NESTED } diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java index 53badf3950d..9c8119a49dd 100644 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java @@ -5,11 +5,15 @@ package org.opensearch.sql.protocol.response; +import static org.opensearch.sql.expression.HighlightExpression.HIGHLIGHT_FIELD; + import java.util.Collection; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.stream.Collectors; import lombok.Getter; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; @@ -82,19 +86,48 @@ public Map columnNameTypes() { @Override public Iterator iterator() { - // Any chance to avoid copy for json response generation? return exprValues.stream() .map(ExprValueUtils::getTupleValue) - .map(Map::values) - .map(this::convertExprValuesToValues) + .map( + tuple -> + tuple.entrySet().stream() + .filter(e -> !HIGHLIGHT_FIELD.equals(e.getKey())) + .map(e -> e.getValue().value()) + .toArray(Object[]::new)) .iterator(); } - private String getColumnName(Column column) { - return (column.getAlias() != null) ? column.getAlias() : column.getName(); + /** + * Extract highlight data from each result row. Each row may contain a {@code _highlight} field + * added by {@code OpenSearchResponse.addHighlightsToBuilder()} and preserved through projection. + * Returns a list parallel to datarows where each entry is either a map of field name to highlight + * fragments, or null if no highlight data exists for that row. + * + * @return list of highlight maps, one per row + */ + public List> highlights() { + return exprValues.stream() + .map(ExprValueUtils::getTupleValue) + .map( + tuple -> { + ExprValue hl = tuple.get(HIGHLIGHT_FIELD); + if (hl == null || hl.isMissing()) { + return null; + } + Map hlMap = new LinkedHashMap<>(); + for (Map.Entry entry : hl.tupleValue().entrySet()) { + hlMap.put( + entry.getKey(), + entry.getValue().collectionValue().stream() + .map(ExprValue::stringValue) + .collect(Collectors.toList())); + } + return (Map) hlMap; + }) + .collect(Collectors.toList()); } - private Object[] convertExprValuesToValues(Collection exprValues) { - return exprValues.stream().map(ExprValue::value).toArray(Object[]::new); + private String getColumnName(Column column) { + return (column.getAlias() != null) ? column.getAlias() : column.getName(); } } diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatter.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatter.java index 8be22af5326..5c696035555 100644 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatter.java +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatter.java @@ -6,6 +6,8 @@ package org.opensearch.sql.protocol.response.format; import java.util.List; +import java.util.Map; +import java.util.Objects; import lombok.Builder; import lombok.Getter; import lombok.RequiredArgsConstructor; @@ -38,6 +40,12 @@ protected Object buildJsonObject(QueryResult response) { response.getSchema().getColumns().forEach(col -> json.column(fetchColumn(col))); json.datarows(fetchDataRows(response)); + // Populate highlights if present + List> highlights = response.highlights(); + if (highlights.stream().anyMatch(Objects::nonNull)) { + json.highlights(highlights); + } + // Populate other fields json.total(response.size()).size(response.size()).status(200); if (!response.getCursor().equals(Cursor.None)) { @@ -88,6 +96,7 @@ public static class JdbcResponse { private final List schema; private final Object[][] datarows; + private final List> highlights; private final long total; private final long size; private final int status; diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java index ff59ce4cddc..5e57d8aacd6 100644 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java @@ -6,6 +6,8 @@ package org.opensearch.sql.protocol.response.format; import java.util.List; +import java.util.Map; +import java.util.Objects; import lombok.Builder; import lombok.Getter; import lombok.RequiredArgsConstructor; @@ -54,6 +56,13 @@ public Object buildJsonObject(QueryResult response) { response.columnNameTypes().forEach((name, type) -> json.column(new Column(name, type))); json.datarows(fetchDataRows(response)); + + // Populate highlights if present + List> highlights = response.highlights(); + if (highlights.stream().anyMatch(Objects::nonNull)) { + json.highlights(highlights); + } + formatMetric.set(System.nanoTime() - formatTime); json.profile(QueryProfiling.current().finish()); @@ -79,6 +88,7 @@ public static class JsonResponse { private final List schema; private final Object[][] datarows; + private final List> highlights; private long total; private long size; diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java index fc3402e20a5..f028da0f692 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java @@ -7,6 +7,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.fail; import static org.opensearch.sql.data.model.ExprValueUtils.tupleValue; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; @@ -16,7 +17,11 @@ import com.google.common.collect.ImmutableMap; import java.util.Arrays; import java.util.Collections; +import java.util.List; +import java.util.Map; import org.junit.jupiter.api.Test; +import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.pagination.Cursor; @@ -106,4 +111,68 @@ void iterate() { i++; } } + + @Test + void iterate_excludes_highlight_from_datarows() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList( + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.stringValue("John"), + "age", + ExprValueUtils.integerValue(20), + "_highlight", + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.collectionValue( + ImmutableList.of("John"))))))), + Cursor.None); + + for (Object[] objects : response) { + // datarows should only have schema columns, not _highlight + assertArrayEquals(new Object[] {"John", 20}, objects); + } + } + + @Test + void highlights_returns_highlight_data() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList( + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.stringValue("John"), + "age", + ExprValueUtils.integerValue(20), + "_highlight", + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.collectionValue( + ImmutableList.of("John"))))))), + Cursor.None); + + List> highlights = response.highlights(); + assertEquals(1, highlights.size()); + assertEquals(ImmutableMap.of("name", ImmutableList.of("John")), highlights.get(0)); + } + + @Test + void highlights_returns_null_when_no_highlight_data() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList(tupleValue(ImmutableMap.of("name", "John", "age", 20))), + Cursor.None); + + List> highlights = response.highlights(); + assertEquals(1, highlights.size()); + assertNull(highlights.get(0)); + } } diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatterTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatterTest.java index 16dd1590eea..b8347f32be7 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatterTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/JdbcResponseFormatterTest.java @@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableMap; import com.google.gson.JsonParser; import java.util.Arrays; +import java.util.Collections; import java.util.Map; import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; @@ -207,6 +208,55 @@ void format_server_error_response_due_to_opensearch() { "all shards failed", new IllegalStateException("Execution error")))); } + @Test + void format_response_with_highlights() { + QueryResult response = + new QueryResult( + new Schema( + ImmutableList.of( + new Column("name", null, STRING), new Column("age", null, INTEGER))), + Collections.singletonList( + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + stringValue("John"), + "age", + org.opensearch.sql.data.model.ExprValueUtils.integerValue(20), + "_highlight", + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + org.opensearch.sql.data.model.ExprValueUtils.collectionValue( + ImmutableList.of("John")))))))); + + assertJsonEquals( + "{" + + "\"schema\":[" + + "{\"name\":\"name\",\"type\":\"keyword\"}," + + "{\"name\":\"age\",\"type\":\"integer\"}" + + "]," + + "\"datarows\":[[\"John\",20]]," + + "\"highlights\":[{\"name\":[\"John\"]}]," + + "\"total\":1," + + "\"size\":1," + + "\"status\":200}", + formatter.format(response)); + } + + @Test + void format_response_without_highlights() { + QueryResult response = + new QueryResult( + new Schema( + ImmutableList.of( + new Column("name", null, STRING), new Column("age", null, INTEGER))), + Collections.singletonList(tupleValue(ImmutableMap.of("name", "John", "age", 20)))); + + // When no highlights, the highlights field should not appear in the JSON + String formatted = formatter.format(response); + assertEquals(false, formatted.contains("\"highlights\"")); + } + private static void assertJsonEquals(String expected, String actual) { assertEquals(JsonParser.parseString(expected), JsonParser.parseString(actual)); } From 3b59ea113e722f1613c52e3e049cfb72c54429f4 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Wed, 18 Feb 2026 14:56:27 -0800 Subject: [PATCH 02/18] [review-ready] highligting with ppl api pass-through design Signed-off-by: Jialiang Liang --- .../sql/calcite/CalcitePlanContext.java | 20 ++++++++ .../sql/calcite/CalciteRelNodeVisitor.java | 4 -- .../sql/calcite/utils/PPLHintUtils.java | 37 -------------- .../sql/executor/execution/AbstractPlan.java | 10 ++++ .../sql/executor/execution/ExplainPlan.java | 16 +++++- .../sql/executor/execution/QueryPlan.java | 22 +++++++-- .../scan/AbstractCalciteIndexScan.java | 49 ++++++++++++++++++- .../scan/CalciteEnumerableIndexScan.java | 1 + .../storage/scan/CalciteLogicalIndexScan.java | 22 --------- .../org/opensearch/sql/ppl/PPLService.java | 20 +++++++- .../sql/ppl/domain/PPLQueryRequest.java | 14 ++++++ 11 files changed, 143 insertions(+), 72 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java index 6ad935e59da..adf02cee275 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java @@ -45,6 +45,26 @@ public class CalcitePlanContext { private static final ThreadLocal legacyPreferredFlag = ThreadLocal.withInitial(() -> true); + /** + * Thread-local highlight configuration from the PPL request body. Set by PPLService before query + * execution and read by CalciteEnumerableIndexScan when building the OpenSearch request. The map + * represents the highlight JSON object (fields, pre_tags, post_tags, fragment_size) that the + * caller provides and the backend forwards as-is to OpenSearch. + */ + private static final ThreadLocal> highlightConfig = new ThreadLocal<>(); + + public static void setHighlightConfig(Map config) { + highlightConfig.set(config); + } + + public static Map getHighlightConfig() { + return highlightConfig.get(); + } + + public static void clearHighlightConfig() { + highlightConfig.remove(); + } + @Getter @Setter private boolean isResolvingJoinCondition = false; @Getter @Setter private boolean isResolvingSubquery = false; @Getter @Setter private boolean inCoalesceFunction = false; diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 9a1c0f2a880..c6e0c5f2f25 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -228,10 +228,6 @@ public RelNode visitSearch(Search node, CalcitePlanContext context) { // Visit the Relation child to get the scan node.getChild().get(0).accept(this, context); - // Mark the scan as originating from a search command so that the optimizer - // can scope auto-highlight injection to search queries only. - PPLHintUtils.markSearchCommand(context.relBuilder); - // Create query_string function Function queryStringFunc = AstDSL.function( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java index 31d213b32d8..7b87778797f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java @@ -6,14 +6,10 @@ package org.opensearch.sql.calcite.utils; import com.google.common.base.Suppliers; -import java.util.List; import java.util.function.Supplier; import lombok.experimental.UtilityClass; -import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; -import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.hint.HintStrategyTable; -import org.apache.calcite.rel.hint.Hintable; import org.apache.calcite.rel.hint.RelHint; import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.tools.RelBuilder; @@ -23,8 +19,6 @@ public class PPLHintUtils { private static final String HINT_AGG_ARGUMENTS = "AGG_ARGS"; private static final String KEY_IGNORE_NULL_BUCKET = "ignoreNullBucket"; private static final String KEY_HAS_NESTED_AGG_CALL = "hasNestedAggCall"; - public static final String HINT_SEARCH_COMMAND = "SEARCH_COMMAND"; - private static final Supplier HINT_STRATEGY_TABLE = Suppliers.memoize( () -> @@ -34,7 +28,6 @@ public class PPLHintUtils { (hint, rel) -> { return rel instanceof LogicalAggregate; }) - .hintStrategy(HINT_SEARCH_COMMAND, (hint, rel) -> rel instanceof TableScan) .build()); /** @@ -86,34 +79,4 @@ public static boolean hasNestedAggCall(Aggregate aggregate) { .getOrDefault(KEY_HAS_NESTED_AGG_CALL, "false") .equals("true")); } - - /** - * Mark a scan node as originating from a PPL search command. The scan node may be on top of the - * relBuilder stack directly, or wrapped in a Project (due to alias field wrapping). This hint is - * used to scope auto-highlight injection to search command queries only. - */ - public static void markSearchCommand(RelBuilder relBuilder) { - final RelHint hint = RelHint.builder(HINT_SEARCH_COMMAND).build(); - RelNode top = relBuilder.peek(); - if (top instanceof Hintable) { - // Scan is directly on top of the stack - relBuilder.hints(hint); - } else if (top instanceof org.apache.calcite.rel.core.Project proj) { - RelNode input = proj.getInput(); - if (input instanceof Hintable hintable) { - RelNode hintedInput = hintable.attachHints(List.of(hint)); - RelNode newProject = proj.copy(proj.getTraitSet(), List.of(hintedInput)); - relBuilder.build(); // pop old project - relBuilder.push(newProject); - } - } - if (relBuilder.getCluster().getHintStrategies() == HintStrategyTable.EMPTY) { - relBuilder.getCluster().setHintStrategies(HINT_STRATEGY_TABLE.get()); - } - } - - /** Return true if the scan has the SEARCH_COMMAND hint. */ - public static boolean isSearchCommand(TableScan scan) { - return scan.getHints().stream().anyMatch(hint -> hint.hintName.equals(HINT_SEARCH_COMMAND)); - } } diff --git a/core/src/main/java/org/opensearch/sql/executor/execution/AbstractPlan.java b/core/src/main/java/org/opensearch/sql/executor/execution/AbstractPlan.java index e470d12507e..f9e3c99dc88 100644 --- a/core/src/main/java/org/opensearch/sql/executor/execution/AbstractPlan.java +++ b/core/src/main/java/org/opensearch/sql/executor/execution/AbstractPlan.java @@ -5,8 +5,10 @@ package org.opensearch.sql.executor.execution; +import java.util.Map; import lombok.Getter; import lombok.RequiredArgsConstructor; +import lombok.Setter; import org.opensearch.sql.ast.statement.ExplainMode; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.executor.ExecutionEngine; @@ -22,6 +24,14 @@ public abstract class AbstractPlan { @Getter protected final QueryType queryType; + /** + * Highlight configuration from the PPL request body. Set by PPLService before submitting the plan + * to the query manager. The plan carries this config across the thread boundary (REST handler + * thread → sql-worker thread), and the worker thread sets it as a ThreadLocal before Calcite + * planning and execution begin. + */ + @Getter @Setter private Map highlightConfig; + /** Start query execution. */ public abstract void execute(); diff --git a/core/src/main/java/org/opensearch/sql/executor/execution/ExplainPlan.java b/core/src/main/java/org/opensearch/sql/executor/execution/ExplainPlan.java index 27f7a47e504..aeab7181ee9 100644 --- a/core/src/main/java/org/opensearch/sql/executor/execution/ExplainPlan.java +++ b/core/src/main/java/org/opensearch/sql/executor/execution/ExplainPlan.java @@ -5,7 +5,9 @@ package org.opensearch.sql.executor.execution; +import java.util.Map; import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.QueryId; @@ -34,7 +36,19 @@ public ExplainPlan( @Override public void execute() { - plan.explain(explainListener, mode); + setHighlightThreadLocal(); + try { + plan.explain(explainListener, mode); + } finally { + CalcitePlanContext.clearHighlightConfig(); + } + } + + private void setHighlightThreadLocal() { + Map config = getHighlightConfig(); + if (config != null) { + CalcitePlanContext.setHighlightConfig(config); + } } @Override diff --git a/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java b/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java index 5896871f5d0..e5f4e1eb868 100644 --- a/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java +++ b/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java @@ -5,11 +5,13 @@ package org.opensearch.sql.executor.execution; +import java.util.Map; import java.util.Optional; import org.apache.commons.lang3.NotImplementedException; import org.opensearch.sql.ast.statement.ExplainMode; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.QueryId; @@ -60,10 +62,15 @@ public QueryPlan( @Override public void execute() { - if (pageSize.isPresent()) { - queryService.execute(new Paginate(pageSize.get(), plan), getQueryType(), listener); - } else { - queryService.execute(plan, getQueryType(), listener); + setHighlightThreadLocal(); + try { + if (pageSize.isPresent()) { + queryService.execute(new Paginate(pageSize.get(), plan), getQueryType(), listener); + } else { + queryService.execute(plan, getQueryType(), listener); + } + } finally { + CalcitePlanContext.clearHighlightConfig(); } } @@ -78,4 +85,11 @@ public void explain( queryService.explain(plan, getQueryType(), listener, mode); } } + + private void setHighlightThreadLocal() { + Map config = getHighlightConfig(); + if (config != null) { + CalcitePlanContext.setHighlightConfig(config); + } + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index e89e5e49d3e..0a56cb85a20 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -45,15 +45,18 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; import org.opensearch.search.sort.ScoreSortBuilder; import org.opensearch.search.sort.ScriptSortBuilder.ScriptSortType; import org.opensearch.search.sort.SortBuilder; import org.opensearch.search.sort.SortBuilders; import org.opensearch.search.sort.SortOrder; +import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.plan.AliasFieldsWrappable; import org.opensearch.sql.common.setting.Settings.Key; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; import org.opensearch.sql.opensearch.request.PredicateAnalyzer; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; import org.opensearch.sql.opensearch.storage.scan.context.AbstractAction; @@ -106,13 +109,55 @@ public RelDataType deriveRowType() { public RelWriter explainTerms(RelWriter pw) { String explainString = String.valueOf(pushDownContext); if (pw instanceof RelWriterImpl) { - // Only add request builder to the explain plan - explainString += ", " + pushDownContext.createRequestBuilder(); + OpenSearchRequestBuilder requestBuilder = pushDownContext.createRequestBuilder(); + applyHighlightConfig(requestBuilder); + explainString += ", " + requestBuilder; } return super.explainTerms(pw) .itemIf("PushDownContext", explainString, !pushDownContext.isEmpty()); } + /** + * Apply highlight configuration from the ThreadLocal to the OpenSearch request builder. The + * highlight config is set on a ThreadLocal by the plan's execute() method (on the worker thread) + * and forwarded as-is to OpenSearch. + * + * @param requestBuilder the OpenSearch request builder to attach the highlight clause to + */ + @SuppressWarnings("unchecked") + protected static void applyHighlightConfig(OpenSearchRequestBuilder requestBuilder) { + Map config = CalcitePlanContext.getHighlightConfig(); + if (config == null) { + return; + } + HighlightBuilder highlightBuilder = new HighlightBuilder(); + Object fieldsObj = config.get("fields"); + if (fieldsObj instanceof Map) { + Map fields = (Map) fieldsObj; + for (String fieldName : fields.keySet()) { + highlightBuilder.field(new HighlightBuilder.Field(fieldName)); + } + } + Object preTagsObj = config.get("pre_tags"); + if (preTagsObj instanceof List) { + List preTags = (List) preTagsObj; + highlightBuilder.preTags(preTags.toArray(new String[0])); + } + Object postTagsObj = config.get("post_tags"); + if (postTagsObj instanceof List) { + List postTags = (List) postTagsObj; + highlightBuilder.postTags(postTags.toArray(new String[0])); + } + Object fragmentSizeObj = config.get("fragment_size"); + if (fragmentSizeObj instanceof Number) { + int fragmentSize = ((Number) fragmentSizeObj).intValue(); + for (HighlightBuilder.Field field : highlightBuilder.fields()) { + field.fragmentSize(fragmentSize); + } + } + requestBuilder.getSourceBuilder().highlighter(highlightBuilder); + } + protected Integer getQuerySizeLimit() { return osIndex.getSettings().getSettingValue(Key.QUERY_SIZE_LIMIT); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java index 7ba75e46ba0..0c1090cda8f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java @@ -119,6 +119,7 @@ public Result implement(EnumerableRelImplementor implementor, Prefer pref) { @Override public Enumerator enumerator() { OpenSearchRequestBuilder requestBuilder = pushDownContext.createRequestBuilder(); + applyHighlightConfig(requestBuilder); return new OpenSearchIndexEnumerator( osIndex.getClient(), getRowType().getFieldNames(), diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 5a2ec10fbf1..dbe8306d4b2 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -40,7 +40,6 @@ import org.apache.logging.log4j.Logger; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; -import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.calcite.utils.PPLHintUtils; import org.opensearch.sql.common.setting.Settings; @@ -159,27 +158,6 @@ public AbstractRelNode pushDownFilter(Filter filter) { (OSRequestBuilderAction) requestBuilder -> requestBuilder.pushDownFilterForCalcite(queryExpression.builder())); - // Auto-inject wildcard highlight for PPL search command result highlighting. - // Only adds highlight when the scan is marked with a SEARCH_COMMAND hint - // (set by CalciteRelNodeVisitor.visitSearch), scoping it to the search command only. - // Uses OSD custom tags so the frontend getHighlightHtml() can convert to . - if (PPLHintUtils.isSearchCommand(this)) { - newScan.pushDownContext.add( - PushDownType.HIGHLIGHT, - "auto_highlight", - (OSRequestBuilderAction) - requestBuilder -> { - if (requestBuilder.getSourceBuilder().highlighter() == null) { - HighlightBuilder highlightBuilder = - new HighlightBuilder() - .field(new HighlightBuilder.Field("*").numOfFragments(0)) - .preTags("@opensearch-dashboards-highlighted-field@") - .postTags("@/opensearch-dashboards-highlighted-field@"); - requestBuilder.getSourceBuilder().highlighter(highlightBuilder); - } - }); - } - // If the query expression is partial, we need to replace the input of the filter with the // partial pushed scan and the filter condition with non-pushed-down conditions. if (queryExpression.isPartial()) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java b/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java index d6f025a4540..750137e6f07 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java @@ -10,6 +10,7 @@ import lombok.extern.log4j.Log4j2; import org.antlr.v4.runtime.tree.ParseTree; +import org.json.JSONObject; import org.opensearch.sql.ast.statement.Statement; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.common.setting.Settings; @@ -64,7 +65,9 @@ public void execute( ResponseListener queryListener, ResponseListener explainListener) { try { - queryManager.submit(plan(request, queryListener, explainListener)); + AbstractPlan plan = plan(request, queryListener, explainListener); + setHighlightOnPlan(plan, request); + queryManager.submit(plan); } catch (Exception e) { queryListener.onFailure(e); } @@ -79,12 +82,25 @@ public void execute( */ public void explain(PPLQueryRequest request, ResponseListener listener) { try { - queryManager.submit(plan(request, NO_CONSUMER_RESPONSE_LISTENER, listener)); + AbstractPlan plan = plan(request, NO_CONSUMER_RESPONSE_LISTENER, listener); + setHighlightOnPlan(plan, request); + queryManager.submit(plan); } catch (Exception e) { listener.onFailure(e); } } + /** + * Set highlight configuration on the plan so it can be carried across the thread boundary. The + * plan's execute() method will set the ThreadLocal on the worker thread. + */ + private void setHighlightOnPlan(AbstractPlan plan, PPLQueryRequest request) { + JSONObject highlight = request.getHighlight(); + if (highlight != null) { + plan.setHighlightConfig(highlight.toMap()); + } + } + private AbstractPlan plan( PPLQueryRequest request, ResponseListener queryListener, diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java b/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java index caf666b3b4e..d144b5f5d0c 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java @@ -109,4 +109,18 @@ public int getFetchSize() { } return jsonContent.optInt(FETCH_SIZE_FIELD, 0); } + + /** + * Get the highlight configuration from the request body. The caller (OSD, API, CLI) controls + * highlighting by providing a highlight object in the PPL request. The backend forwards this + * config as-is to OpenSearch. + * + * @return highlight JSONObject from request, or null if not specified + */ + public JSONObject getHighlight() { + if (jsonContent == null) { + return null; + } + return jsonContent.optJSONObject("highlight"); + } } From 5c1557b04ec4e35836fedc3a6faaf4f14b0f2399 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Wed, 18 Feb 2026 15:53:41 -0800 Subject: [PATCH 03/18] Add tests Signed-off-by: Jialiang Liang --- .../executor/execution/ExplainPlanTest.java | 37 ++++ .../sql/executor/execution/QueryPlanTest.java | 37 ++++ .../calcite/remote/CalcitePPLHighlightIT.java | 184 ++++++++++++++++++ .../rest-api-spec/test/ppl_highlight.yml | 100 ++++++++++ ...AbstractCalciteIndexScanHighlightTest.java | 101 ++++++++++ .../sql/ppl/domain/PPLQueryRequestTest.java | 31 +++ .../SimpleJsonResponseFormatterTest.java | 39 ++++ 7 files changed, 529 insertions(+) create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl_highlight.yml create mode 100644 opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScanHighlightTest.java diff --git a/core/src/test/java/org/opensearch/sql/executor/execution/ExplainPlanTest.java b/core/src/test/java/org/opensearch/sql/executor/execution/ExplainPlanTest.java index 4cb5c755d14..e170fe2a522 100644 --- a/core/src/test/java/org/opensearch/sql/executor/execution/ExplainPlanTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/execution/ExplainPlanTest.java @@ -6,17 +6,22 @@ package org.opensearch.sql.executor.execution; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.QueryId; @@ -56,4 +61,36 @@ public void explainThrowException() { }); assertEquals("explain query can not been explained.", unsupportedExplainException.getMessage()); } + + @Test + public void execute_sets_highlight_threadlocal_for_explain() { + Map highlightConfig = Map.of("fields", Map.of("*", Map.of())); + AtomicReference> captured = new AtomicReference<>(); + + doAnswer( + invocation -> { + captured.set(CalcitePlanContext.getHighlightConfig()); + return null; + }) + .when(queryPlan) + .explain(any(), any()); + + ExplainPlan explainPlan = new ExplainPlan(queryId, queryType, queryPlan, mode, explainListener); + explainPlan.setHighlightConfig(highlightConfig); + explainPlan.execute(); + + assertEquals(highlightConfig, captured.get()); + } + + @Test + public void execute_clears_highlight_threadlocal_after_explain() { + Map highlightConfig = Map.of("fields", Map.of("*", Map.of())); + doNothing().when(queryPlan).explain(any(), any()); + + ExplainPlan explainPlan = new ExplainPlan(queryId, queryType, queryPlan, mode, explainListener); + explainPlan.setHighlightConfig(highlightConfig); + explainPlan.execute(); + + assertNull(CalcitePlanContext.getHighlightConfig()); + } } diff --git a/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java b/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java index 77deb9b6a48..980a6ef9cce 100644 --- a/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java @@ -5,14 +5,19 @@ package org.opensearch.sql.executor.execution; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.lang3.NotImplementedException; import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; @@ -22,6 +27,7 @@ import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.ast.statement.ExplainMode; import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.executor.DefaultExecutionEngine; import org.opensearch.sql.executor.ExecutionEngine; @@ -126,4 +132,35 @@ public void onFailure(Exception e) { }, mode); } + + @Test + public void execute_sets_highlight_threadlocal() { + Map highlightConfig = Map.of("fields", Map.of("*", Map.of())); + AtomicReference> captured = new AtomicReference<>(); + + doAnswer( + invocation -> { + captured.set(CalcitePlanContext.getHighlightConfig()); + return null; + }) + .when(queryService) + .execute(any(), any(), any()); + + QueryPlan query = new QueryPlan(queryId, queryType, plan, queryService, queryListener); + query.setHighlightConfig(highlightConfig); + query.execute(); + + assertEquals(highlightConfig, captured.get()); + } + + @Test + public void execute_clears_highlight_threadlocal_after_execution() { + Map highlightConfig = Map.of("fields", Map.of("*", Map.of())); + + QueryPlan query = new QueryPlan(queryId, queryType, plan, queryService, queryListener); + query.setHighlightConfig(highlightConfig); + query.execute(); + + assertNull(CalcitePlanContext.getHighlightConfig()); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java new file mode 100644 index 00000000000..80eb2621939 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java @@ -0,0 +1,184 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Locale; +import org.json.JSONArray; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.client.Request; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.Response; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalcitePPLHighlightIT extends PPLIntegTestCase { + + private static final String TEST_INDEX = "highlight_test"; + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + + // Create index with text fields + Request createIndex = new Request("PUT", "/" + TEST_INDEX); + createIndex.setJsonEntity( + "{" + + "\"settings\": {\"number_of_shards\": 1, \"number_of_replicas\": 0}," + + "\"mappings\": {\"properties\": {" + + "\"message\": {\"type\": \"text\"}," + + "\"status\": {\"type\": \"text\"}," + + "\"code\": {\"type\": \"integer\"}" + + "}}" + + "}"); + client().performRequest(createIndex); + + // Index test documents + Request bulk = new Request("POST", "/" + TEST_INDEX + "/_bulk?refresh=true"); + bulk.setJsonEntity( + "{\"index\": {}}\n" + + "{\"message\": \"Connection error occurred\", \"status\": \"error response\"," + + " \"code\": 500}\n" + + "{\"index\": {}}\n" + + "{\"message\": \"Request completed successfully\", \"status\": \"ok\", \"code\":" + + " 200}\n" + + "{\"index\": {}}\n" + + "{\"message\": \"Timeout error in service\", \"status\": \"error timeout\", \"code\":" + + " 504}\n"); + client().performRequest(bulk); + } + + @Test + public void testHighlightWithWildcardFields() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "search source=" + TEST_INDEX + " \"error\"", + "{\"fields\": {\"*\": {}}, \"pre_tags\": [\"\"], \"post_tags\": [\"\"]}"); + + assertTrue(result.has("highlights")); + JSONArray highlights = result.getJSONArray("highlights"); + assertEquals(result.getInt("size"), highlights.length()); + + // At least one highlight entry should contain "error" wrapped in tags + boolean foundHighlight = false; + for (int i = 0; i < highlights.length(); i++) { + if (!highlights.isNull(i)) { + String hlStr = highlights.get(i).toString(); + if (hlStr.contains("error") || hlStr.contains("Error")) { + foundHighlight = true; + break; + } + } + } + assertTrue("Expected at least one highlight with tags", foundHighlight); + } + + @Test + public void testHighlightWithSpecificField() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "search source=" + TEST_INDEX + " \"error\"", + "{\"fields\": {\"message\": {}}, \"pre_tags\": [\"\"], \"post_tags\":" + + " [\"\"]}"); + + assertTrue(result.has("highlights")); + JSONArray highlights = result.getJSONArray("highlights"); + + // Check that highlights only contain "message" field, not "status" + for (int i = 0; i < highlights.length(); i++) { + if (!highlights.isNull(i)) { + JSONObject hl = highlights.getJSONObject(i); + assertFalse("Should not highlight status field", hl.has("status")); + } + } + } + + @Test + public void testHighlightWithCustomTags() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "search source=" + TEST_INDEX + " \"error\"", + "{\"fields\": {\"*\": {}}, \"pre_tags\": [\"\"], \"post_tags\": [\"\"]}"); + + assertTrue(result.has("highlights")); + JSONArray highlights = result.getJSONArray("highlights"); + + boolean foundCustomTag = false; + for (int i = 0; i < highlights.length(); i++) { + if (!highlights.isNull(i)) { + String hlStr = highlights.get(i).toString(); + if (hlStr.contains("")) { + foundCustomTag = true; + break; + } + } + } + assertTrue("Expected custom tags in highlights", foundCustomTag); + } + + @Test + public void testNoHighlightWhenNotRequested() throws IOException { + JSONObject result = executeQuery("search source=" + TEST_INDEX + " \"error\""); + + assertFalse("Should not have highlights when not requested", result.has("highlights")); + } + + @Test + public void testHighlightWithPipedFilter() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "search source=" + TEST_INDEX + " \"error\" | where code > 500", + "{\"fields\": {\"*\": {}}, \"pre_tags\": [\"\"], \"post_tags\": [\"\"]}"); + + assertTrue(result.has("highlights")); + // Only the doc with code=504 should match + assertEquals(1, result.getInt("size")); + JSONArray highlights = result.getJSONArray("highlights"); + assertEquals(1, highlights.length()); + assertNotNull(highlights.get(0)); + } + + @Test + public void testExplainWithHighlight() throws IOException { + Request request = new Request("POST", "/_plugins/_ppl/_explain"); + request.setJsonEntity( + String.format( + Locale.ROOT, + "{\"query\": \"search source=%s \\\"error\\\"\"," + + "\"highlight\": {\"fields\": {\"*\": {}}," + + "\"pre_tags\": [\"\"], \"post_tags\": [\"\"]}}", + TEST_INDEX)); + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader("Content-Type", "application/json"); + request.setOptions(restOptionsBuilder); + Response response = client().performRequest(request); + assertEquals(200, response.getStatusLine().getStatusCode()); + + String body = org.opensearch.sql.legacy.TestUtils.getResponseBody(response, true); + // The explain output should contain the highlight clause + assertTrue("Explain should contain highlight", body.contains("highlight")); + } + + private JSONObject executeQueryWithHighlight(String query, String highlightJson) + throws IOException { + Request request = new Request("POST", "/_plugins/_ppl"); + request.setJsonEntity( + String.format(Locale.ROOT, "{\"query\": \"%s\", \"highlight\": %s}", query, highlightJson)); + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader("Content-Type", "application/json"); + request.setOptions(restOptionsBuilder); + Response response = client().performRequest(request); + assertEquals(200, response.getStatusLine().getStatusCode()); + String body = org.opensearch.sql.legacy.TestUtils.getResponseBody(response, true); + return new JSONObject(body); + } +} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl_highlight.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl_highlight.yml new file mode 100644 index 00000000000..e6584c971e0 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl_highlight.yml @@ -0,0 +1,100 @@ +setup: + - do: + indices.create: + index: ppl_highlight_test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + message: + type: text + status: + type: text + code: + type: integer + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + + - do: + bulk: + index: ppl_highlight_test + refresh: true + body: + - '{"index": {}}' + - '{"message": "Connection error occurred", "status": "error response", "code": 500}' + - '{"index": {}}' + - '{"message": "Request completed successfully", "status": "ok", "code": 200}' + - '{"index": {}}' + - '{"message": "Timeout error in service", "status": "error timeout", "code": 504}' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"PPL search with wildcard highlight": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'search source=ppl_highlight_test "error"' + highlight: + fields: + "*": {} + pre_tags: + - "" + post_tags: + - "" + + - match: {"size": 2} + - is_true: highlights + +--- +"PPL search without highlight is backward compatible": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'search source=ppl_highlight_test "error"' + + - match: {"size": 2} + - is_false: highlights + +--- +"PPL search with custom tags": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'search source=ppl_highlight_test "error"' + highlight: + fields: + "*": {} + pre_tags: + - "" + post_tags: + - "" + + - match: {"size": 2} + - is_true: highlights diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScanHighlightTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScanHighlightTest.java new file mode 100644 index 00000000000..61b2a292e5c --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScanHighlightTest.java @@ -0,0 +1,101 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.mockito.Mockito.when; + +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; + +@ExtendWith(MockitoExtension.class) +class AbstractCalciteIndexScanHighlightTest { + + @Mock private OpenSearchRequestBuilder requestBuilder; + + @AfterEach + void cleanup() { + CalcitePlanContext.clearHighlightConfig(); + } + + @Test + void applyHighlightConfig_withNullConfig_doesNothing() { + // No ThreadLocal set — config is null, requestBuilder should not be called + AbstractCalciteIndexScan.applyHighlightConfig(requestBuilder); + // If config is null, method returns early — no interaction with requestBuilder + } + + @Test + void applyHighlightConfig_withWildcardFields_setsHighlighter() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + when(requestBuilder.getSourceBuilder()).thenReturn(sourceBuilder); + + CalcitePlanContext.setHighlightConfig(Map.of("fields", Map.of("*", Map.of()))); + AbstractCalciteIndexScan.applyHighlightConfig(requestBuilder); + + HighlightBuilder highlighter = sourceBuilder.highlighter(); + assertNotNull(highlighter); + assertEquals(1, highlighter.fields().size()); + assertEquals("*", highlighter.fields().get(0).name()); + } + + @Test + void applyHighlightConfig_withCustomTags_setsTags() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + when(requestBuilder.getSourceBuilder()).thenReturn(sourceBuilder); + + CalcitePlanContext.setHighlightConfig( + Map.of( + "fields", Map.of("message", Map.of()), + "pre_tags", List.of(""), + "post_tags", List.of(""))); + AbstractCalciteIndexScan.applyHighlightConfig(requestBuilder); + + HighlightBuilder highlighter = sourceBuilder.highlighter(); + assertArrayEquals(new String[] {""}, highlighter.preTags()); + assertArrayEquals(new String[] {""}, highlighter.postTags()); + } + + @Test + void applyHighlightConfig_withFragmentSize_setsPerField() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + when(requestBuilder.getSourceBuilder()).thenReturn(sourceBuilder); + + CalcitePlanContext.setHighlightConfig( + Map.of("fields", Map.of("*", Map.of()), "fragment_size", 2147483647)); + AbstractCalciteIndexScan.applyHighlightConfig(requestBuilder); + + HighlightBuilder highlighter = sourceBuilder.highlighter(); + assertEquals(1, highlighter.fields().size()); + assertEquals(Integer.valueOf(2147483647), highlighter.fields().get(0).fragmentSize()); + } + + @Test + void applyHighlightConfig_withMalformedConfig_handlesGracefully() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + when(requestBuilder.getSourceBuilder()).thenReturn(sourceBuilder); + + // "fields" is a String instead of Map — should not throw NPE + CalcitePlanContext.setHighlightConfig(Map.of("fields", "not_a_map")); + AbstractCalciteIndexScan.applyHighlightConfig(requestBuilder); + + // Should still create a highlighter (just with no fields) + HighlightBuilder highlighter = sourceBuilder.highlighter(); + assertNotNull(highlighter); + assertEquals(0, highlighter.fields().size()); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java index 9fe3a4fef64..8e4731c0c3c 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java @@ -6,6 +6,8 @@ package org.opensearch.sql.ppl.domain; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import org.json.JSONObject; @@ -92,4 +94,33 @@ public void testGetFetchSizeWithLargeValue() { PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); assertEquals(15000, request.getFetchSize()); } + + @Test + public void testGetHighlightReturnsHighlightObject() { + JSONObject json = new JSONObject(); + json.put("query", "source=t \"error\""); + json.put( + "highlight", + new JSONObject( + "{\"fields\": {\"*\": {}}, \"pre_tags\": [\"\"], \"post_tags\": [\"\"]}")); + PPLQueryRequest request = new PPLQueryRequest("source=t \"error\"", json, "/_plugins/_ppl"); + JSONObject highlight = request.getHighlight(); + assertNotNull(highlight); + assertTrue(highlight.has("fields")); + assertTrue(highlight.has("pre_tags")); + assertTrue(highlight.has("post_tags")); + } + + @Test + public void testGetHighlightReturnsNullWhenNotSpecified() { + JSONObject json = new JSONObject("{\"query\": \"source=t\"}"); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + assertNull(request.getHighlight()); + } + + @Test + public void testGetHighlightReturnsNullWhenJsonContentIsNull() { + PPLQueryRequest request = new PPLQueryRequest("source=t", null, "/_plugins/_ppl"); + assertNull(request.getHighlight()); + } } diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java index e5eb0f1ac77..25aa78a2c8c 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java @@ -6,7 +6,11 @@ package org.opensearch.sql.protocol.response.format; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_MISSING; +import static org.opensearch.sql.data.model.ExprValueUtils.collectionValue; +import static org.opensearch.sql.data.model.ExprValueUtils.integerValue; import static org.opensearch.sql.data.model.ExprValueUtils.stringValue; import static org.opensearch.sql.data.model.ExprValueUtils.tupleValue; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; @@ -17,6 +21,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.Arrays; +import java.util.Collections; import org.junit.jupiter.api.Test; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.executor.ExecutionEngine; @@ -178,4 +183,38 @@ void formatErrorPretty() { + "}", formatter.format(new RuntimeException("This is an exception"))); } + + @Test + void formatResponseWithHighlights() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList( + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "firstname", + stringValue("John"), + "age", + integerValue(20), + "_highlight", + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "firstname", + collectionValue(ImmutableList.of("John")))))))); + SimpleJsonResponseFormatter formatter = new SimpleJsonResponseFormatter(COMPACT); + String result = formatter.format(response); + assertTrue(result.contains("\"highlights\"")); + assertTrue(result.contains("\"firstname\":[\"John\"]")); + } + + @Test + void formatResponseWithoutHighlightsOmitsField() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList(tupleValue(ImmutableMap.of("firstname", "John", "age", 20)))); + SimpleJsonResponseFormatter formatter = new SimpleJsonResponseFormatter(COMPACT); + String result = formatter.format(response); + assertFalse(result.contains("\"highlights\"")); + } } From 40cee36297a0099b22d0e81f1e70b8446d0b9753 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Wed, 18 Feb 2026 16:10:14 -0800 Subject: [PATCH 04/18] style cleanup Signed-off-by: Jialiang Liang --- .../opensearch/sql/calcite/CalciteRelNodeVisitor.java | 1 - .../opensearch/sql/calcite/utils/PPLHintUtils.java | 2 ++ .../storage/scan/AbstractCalciteIndexScan.java | 11 +++-------- .../opensearch/storage/scan/context/PushDownType.java | 4 ++-- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index c6e0c5f2f25..84386dd0084 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -227,7 +227,6 @@ private RelBuilder scan(RelOptTable tableSchema, CalcitePlanContext context) { public RelNode visitSearch(Search node, CalcitePlanContext context) { // Visit the Relation child to get the scan node.getChild().get(0).accept(this, context); - // Create query_string function Function queryStringFunc = AstDSL.function( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java index 7b87778797f..915c45e7083 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java @@ -19,6 +19,7 @@ public class PPLHintUtils { private static final String HINT_AGG_ARGUMENTS = "AGG_ARGS"; private static final String KEY_IGNORE_NULL_BUCKET = "ignoreNullBucket"; private static final String KEY_HAS_NESTED_AGG_CALL = "hasNestedAggCall"; + private static final Supplier HINT_STRATEGY_TABLE = Suppliers.memoize( () -> @@ -28,6 +29,7 @@ public class PPLHintUtils { (hint, rel) -> { return rel instanceof LogicalAggregate; }) + // add more here .build()); /** diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 0a56cb85a20..e1fedb8bbc9 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -175,7 +175,7 @@ public double estimateRowCount(RelMetadataQuery mq) { (rowCount, operation) -> switch (operation.type()) { case AGGREGATION -> mq.getRowCount((RelNode) operation.digest()); - case PROJECT, SORT, SORT_EXPR, HIGHLIGHT -> rowCount; + case PROJECT, SORT, SORT_EXPR -> rowCount; case SORT_AGG_METRICS -> NumberUtil.min(rowCount, osIndex.getQueryBucketSize().doubleValue()); // Refer the org.apache.calcite.rel.metadata.RelMdRowCount @@ -221,8 +221,8 @@ public double estimateRowCount(RelMetadataQuery mq) { dRows = mq.getRowCount((RelNode) operation.digest()); dCpu += dRows * getAggMultiplier(operation); } - // Ignored Project and Highlight in cost accumulation - case PROJECT, HIGHLIGHT -> {} + // Ignored Project in cost accumulation, but it will affect the external cost + case PROJECT -> {} case SORT -> dCpu += dRows; case SORT_AGG_METRICS -> { dRows = dRows * .9 / 10; // *.9 because always bucket IS_NOT_NULL @@ -311,11 +311,6 @@ public Map getAliasMapping() { return osIndex.getAliasMapping(); } - @Override - public RelNode withHints(List hintList) { - return buildScan(getCluster(), traitSet, hintList, table, osIndex, schema, pushDownContext); - } - public abstract AbstractCalciteIndexScan copy(); protected List getCollationNames(List collations) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java index d6817b8eeab..c763808164d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java @@ -15,7 +15,7 @@ public enum PushDownType { SCRIPT, // script in predicate SORT_AGG_METRICS, // convert composite aggregate to terms or multi-terms bucket aggregate RARE_TOP, // convert composite aggregate to nested aggregate - SORT_EXPR, - HIGHLIGHT + SORT_EXPR + // HIGHLIGHT, // NESTED } From fcf44a99e8033fa66c5ec19dc08f29596ff0ab06 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Wed, 18 Feb 2026 16:39:30 -0800 Subject: [PATCH 05/18] add another test case Signed-off-by: Jialiang Liang --- .../protocol/response/QueryResultTest.java | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java index f028da0f692..cfa0dd5a328 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java @@ -175,4 +175,25 @@ void highlights_returns_null_when_no_highlight_data() { assertEquals(1, highlights.size()); assertNull(highlights.get(0)); } + + @Test + void highlights_returns_null_when_highlight_is_missing() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList( + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.stringValue("John"), + "age", + ExprValueUtils.integerValue(20), + "_highlight", + ExprValueUtils.LITERAL_MISSING))), + Cursor.None); + + List> highlights = response.highlights(); + assertEquals(1, highlights.size()); + assertNull(highlights.get(0)); + } } From 71faa1f0f7bc4f10c8f2e6cbcd4c51b6b44bdc0b Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Wed, 18 Feb 2026 20:19:49 -0800 Subject: [PATCH 06/18] fix tests Signed-off-by: Jialiang Liang --- .../calcite/remote/CalcitePPLHighlightIT.java | 78 +++++++------------ 1 file changed, 30 insertions(+), 48 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java index 80eb2621939..05a6e9080ce 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java @@ -7,8 +7,8 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; import java.io.IOException; import java.util.Locale; @@ -22,58 +22,29 @@ public class CalcitePPLHighlightIT extends PPLIntegTestCase { - private static final String TEST_INDEX = "highlight_test"; - @Override public void init() throws Exception { super.init(); enableCalcite(); - - // Create index with text fields - Request createIndex = new Request("PUT", "/" + TEST_INDEX); - createIndex.setJsonEntity( - "{" - + "\"settings\": {\"number_of_shards\": 1, \"number_of_replicas\": 0}," - + "\"mappings\": {\"properties\": {" - + "\"message\": {\"type\": \"text\"}," - + "\"status\": {\"type\": \"text\"}," - + "\"code\": {\"type\": \"integer\"}" - + "}}" - + "}"); - client().performRequest(createIndex); - - // Index test documents - Request bulk = new Request("POST", "/" + TEST_INDEX + "/_bulk?refresh=true"); - bulk.setJsonEntity( - "{\"index\": {}}\n" - + "{\"message\": \"Connection error occurred\", \"status\": \"error response\"," - + " \"code\": 500}\n" - + "{\"index\": {}}\n" - + "{\"message\": \"Request completed successfully\", \"status\": \"ok\", \"code\":" - + " 200}\n" - + "{\"index\": {}}\n" - + "{\"message\": \"Timeout error in service\", \"status\": \"error timeout\", \"code\":" - + " 504}\n"); - client().performRequest(bulk); + loadIndex(Index.ACCOUNT); } @Test public void testHighlightWithWildcardFields() throws IOException { JSONObject result = executeQueryWithHighlight( - "search source=" + TEST_INDEX + " \"error\"", + "search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\"", "{\"fields\": {\"*\": {}}, \"pre_tags\": [\"\"], \"post_tags\": [\"\"]}"); assertTrue(result.has("highlights")); JSONArray highlights = result.getJSONArray("highlights"); assertEquals(result.getInt("size"), highlights.length()); - // At least one highlight entry should contain "error" wrapped in tags boolean foundHighlight = false; for (int i = 0; i < highlights.length(); i++) { if (!highlights.isNull(i)) { String hlStr = highlights.get(i).toString(); - if (hlStr.contains("error") || hlStr.contains("Error")) { + if (hlStr.contains("Street")) { foundHighlight = true; break; } @@ -86,18 +57,18 @@ public void testHighlightWithWildcardFields() throws IOException { public void testHighlightWithSpecificField() throws IOException { JSONObject result = executeQueryWithHighlight( - "search source=" + TEST_INDEX + " \"error\"", - "{\"fields\": {\"message\": {}}, \"pre_tags\": [\"\"], \"post_tags\":" + "search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\"", + "{\"fields\": {\"address\": {}}, \"pre_tags\": [\"\"], \"post_tags\":" + " [\"\"]}"); assertTrue(result.has("highlights")); JSONArray highlights = result.getJSONArray("highlights"); - // Check that highlights only contain "message" field, not "status" for (int i = 0; i < highlights.length(); i++) { if (!highlights.isNull(i)) { JSONObject hl = highlights.getJSONObject(i); - assertFalse("Should not highlight status field", hl.has("status")); + // Only address field should be highlighted, not other text fields + assertFalse("Should not highlight firstname field", hl.has("firstname")); } } } @@ -106,8 +77,9 @@ public void testHighlightWithSpecificField() throws IOException { public void testHighlightWithCustomTags() throws IOException { JSONObject result = executeQueryWithHighlight( - "search source=" + TEST_INDEX + " \"error\"", - "{\"fields\": {\"*\": {}}, \"pre_tags\": [\"\"], \"post_tags\": [\"\"]}"); + "search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\"", + "{\"fields\": {\"*\": {}}, \"pre_tags\": [\"\"], \"post_tags\":" + + " [\"\"]}"); assertTrue(result.has("highlights")); JSONArray highlights = result.getJSONArray("highlights"); @@ -127,7 +99,8 @@ public void testHighlightWithCustomTags() throws IOException { @Test public void testNoHighlightWhenNotRequested() throws IOException { - JSONObject result = executeQuery("search source=" + TEST_INDEX + " \"error\""); + JSONObject result = + executeQueryNoHighlight("search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\""); assertFalse("Should not have highlights when not requested", result.has("highlights")); } @@ -136,15 +109,13 @@ public void testNoHighlightWhenNotRequested() throws IOException { public void testHighlightWithPipedFilter() throws IOException { JSONObject result = executeQueryWithHighlight( - "search source=" + TEST_INDEX + " \"error\" | where code > 500", + "search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\" | where age > 30", "{\"fields\": {\"*\": {}}, \"pre_tags\": [\"\"], \"post_tags\": [\"\"]}"); assertTrue(result.has("highlights")); - // Only the doc with code=504 should match - assertEquals(1, result.getInt("size")); + assertTrue(result.getInt("size") > 0); JSONArray highlights = result.getJSONArray("highlights"); - assertEquals(1, highlights.length()); - assertNotNull(highlights.get(0)); + assertEquals(result.getInt("size"), highlights.length()); } @Test @@ -153,10 +124,10 @@ public void testExplainWithHighlight() throws IOException { request.setJsonEntity( String.format( Locale.ROOT, - "{\"query\": \"search source=%s \\\"error\\\"\"," + "{\"query\": \"search source=%s \\\"Street\\\"\"," + "\"highlight\": {\"fields\": {\"*\": {}}," + "\"pre_tags\": [\"\"], \"post_tags\": [\"\"]}}", - TEST_INDEX)); + TEST_INDEX_ACCOUNT)); RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); restOptionsBuilder.addHeader("Content-Type", "application/json"); request.setOptions(restOptionsBuilder); @@ -164,7 +135,6 @@ public void testExplainWithHighlight() throws IOException { assertEquals(200, response.getStatusLine().getStatusCode()); String body = org.opensearch.sql.legacy.TestUtils.getResponseBody(response, true); - // The explain output should contain the highlight clause assertTrue("Explain should contain highlight", body.contains("highlight")); } @@ -181,4 +151,16 @@ private JSONObject executeQueryWithHighlight(String query, String highlightJson) String body = org.opensearch.sql.legacy.TestUtils.getResponseBody(response, true); return new JSONObject(body); } + + private JSONObject executeQueryNoHighlight(String query) throws IOException { + Request request = new Request("POST", "/_plugins/_ppl"); + request.setJsonEntity(String.format(Locale.ROOT, "{\"query\": \"%s\"}", query)); + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader("Content-Type", "application/json"); + request.setOptions(restOptionsBuilder); + Response response = client().performRequest(request); + assertEquals(200, response.getStatusLine().getStatusCode()); + String body = org.opensearch.sql.legacy.TestUtils.getResponseBody(response, true); + return new JSONObject(body); + } } From 1b7d1a4639f36b7149fde8b02221d69dade7e007 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Thu, 19 Feb 2026 16:33:25 -0800 Subject: [PATCH 07/18] peng - _highlight hiden column Signed-off-by: Jialiang Liang --- .../executor/OpenSearchExecutionEngine.java | 35 +++++++-------- .../storage/scan/CalciteLogicalIndexScan.java | 27 +++++++++++- .../scan/OpenSearchIndexEnumerator.java | 43 ++++++------------- .../sql/protocol/response/QueryResult.java | 2 +- 4 files changed, 54 insertions(+), 53 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java index 7e215096af1..78da41f74a5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java @@ -63,7 +63,6 @@ import org.opensearch.sql.opensearch.executor.protector.ExecutionProtector; import org.opensearch.sql.opensearch.functions.DistinctCountApproxAggFunction; import org.opensearch.sql.opensearch.functions.GeoIpFunction; -import org.opensearch.sql.opensearch.storage.scan.OpenSearchIndexEnumerator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.storage.TableScanOperator; import org.opensearch.transport.client.node.NodeClient; @@ -212,7 +211,6 @@ public void execute( client.schedule( () -> { try (PreparedStatement statement = OpenSearchRelRunners.run(context, rel)) { - OpenSearchIndexEnumerator.clearCollectedHighlights(); ProfileMetric metric = QueryProfiling.current().getOrCreateMetric(MetricName.EXECUTE); long execTime = System.nanoTime(); ResultSet result = statement.executeQuery(); @@ -269,10 +267,19 @@ private QueryResponse buildResultSet( List values = new ArrayList<>(); // Iterate through the ResultSet while (resultSet.next() && (querySizeLimit == null || values.size() < querySizeLimit)) { - Map row = new LinkedHashMap(); + Map row = new LinkedHashMap<>(); // Loop through each column for (int i = 1; i <= columnCount; i++) { String columnName = metaData.getColumnName(i); + // _highlight flows through the Calcite pipeline as a hidden column (SqlTypeName.ANY). + // Extract it as an opaque ExprValue and embed it in the row tuple directly. + if (HighlightExpression.HIGHLIGHT_FIELD.equals(columnName)) { + Object value = resultSet.getObject(i); + if (value instanceof ExprValue hl && !hl.isMissing()) { + row.put(HighlightExpression.HIGHLIGHT_FIELD, hl); + } + continue; + } Object value = resultSet.getObject(columnName); Object converted = processValue(value); ExprValue exprValue = ExprValueUtils.fromObjectValue(converted); @@ -281,24 +288,13 @@ private QueryResponse buildResultSet( values.add(ExprTupleValue.fromExprValueMap(row)); } - // Merge highlight data collected by the enumerator back into ExprTupleValues. - // The Calcite row pipeline only carries schema column values, so highlight metadata - // is collected as a side channel in OpenSearchIndexEnumerator and merged here. - List collectedHighlights = - OpenSearchIndexEnumerator.getAndClearCollectedHighlights(); - for (int i = 0; i < Math.min(values.size(), collectedHighlights.size()); i++) { - ExprValue hl = collectedHighlights.get(i); - if (hl != null) { - Map rowWithHighlight = - new LinkedHashMap<>(ExprValueUtils.getTupleValue(values.get(i))); - rowWithHighlight.put(HighlightExpression.HIGHLIGHT_FIELD, hl); - values.set(i, ExprTupleValue.fromExprValueMap(rowWithHighlight)); - } - } - List columns = new ArrayList<>(metaData.getColumnCount()); for (int i = 1; i <= columnCount; ++i) { String columnName = metaData.getColumnName(i); + // Exclude _highlight from the response schema — it's a hidden column. + if (HighlightExpression.HIGHLIGHT_FIELD.equals(columnName)) { + continue; + } RelDataType fieldType = fieldTypes.get(i - 1); // TODO: Correct this after fixing issue github.com/opensearch-project/sql/issues/3751 // The element type of struct and array is currently set to ANY. @@ -317,8 +313,7 @@ private QueryResponse buildResultSet( columns.add(new Column(columnName, null, exprType)); } Schema schema = new Schema(columns); - QueryResponse response = new QueryResponse(schema, values, null); - return response; + return new QueryResponse(schema, values, null); } /** Registers opensearch-dependent functions */ diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index dbe8306d4b2..76de07a9093 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -5,6 +5,8 @@ package org.opensearch.sql.opensearch.storage.scan; +import static org.opensearch.sql.expression.HighlightExpression.HIGHLIGHT_FIELD; + import com.google.common.collect.ImmutableList; import java.util.ArrayList; import java.util.List; @@ -35,11 +37,13 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; +import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.calcite.utils.PPLHintUtils; import org.opensearch.sql.common.setting.Settings; @@ -78,10 +82,26 @@ public CalciteLogicalIndexScan( ImmutableList.of(), table, osIndex, - table.getRowType(), + buildInitialSchema(cluster, table), new PushDownContext(osIndex)); } + /** + * Build the initial schema for the scan, conditionally adding a hidden {@code _highlight} column + * when highlight configuration is present. This allows highlight data to flow through all Calcite + * operators (filter, sort, dedup) as a regular column, avoiding positional misalignment issues. + */ + private static RelDataType buildInitialSchema(RelOptCluster cluster, RelOptTable table) { + RelDataType base = table.getRowType(); + if (CalcitePlanContext.getHighlightConfig() == null) { + return base; + } + RelDataTypeFactory.Builder builder = cluster.getTypeFactory().builder(); + builder.addAll(base.getFieldList()); + builder.add(HIGHLIGHT_FIELD, SqlTypeName.ANY); + return builder.build(); + } + protected CalciteLogicalIndexScan( RelOptCluster cluster, RelTraitSet traitSet, @@ -276,10 +296,13 @@ public CalciteLogicalIndexScan pushDownProject(List selectedColumns) { // For aggregate, we do nothing on query builder but only change the schema of the scan. action = (AggregationBuilderAction) aggAction -> {}; } else { + // Filter _highlight from _source includes — it's not a source field in OpenSearch. action = (OSRequestBuilderAction) requestBuilder -> - requestBuilder.pushDownProjectStream(newSchema.getFieldNames().stream()); + requestBuilder.pushDownProjectStream( + newSchema.getFieldNames().stream() + .filter(name -> !HIGHLIGHT_FIELD.equals(name))); } newScan.pushDownContext.add( PushDownType.PROJECT, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java index e4d86ce0fad..d8bcfe6b280 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java @@ -7,7 +7,6 @@ import static org.opensearch.sql.expression.HighlightExpression.HIGHLIGHT_FIELD; -import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -31,27 +30,6 @@ */ public class OpenSearchIndexEnumerator implements Enumerator { - /** - * Thread-local collector for highlight data. Since the Calcite row pipeline only carries schema - * column values, highlight metadata from OpenSearch hits is collected here as a side channel. - * After execution, {@link #getAndClearCollectedHighlights()} retrieves the collected data so it - * can be merged back into the ExprTupleValues for the JDBC response. - */ - private static final ThreadLocal> COLLECTED_HIGHLIGHTS = - ThreadLocal.withInitial(ArrayList::new); - - /** Retrieve collected highlights and clear the ThreadLocal. */ - public static List getAndClearCollectedHighlights() { - List result = new ArrayList<>(COLLECTED_HIGHLIGHTS.get()); - COLLECTED_HIGHLIGHTS.get().clear(); - return result; - } - - /** Clear collected highlights (call before starting a new execution). */ - public static void clearCollectedHighlights() { - COLLECTED_HIGHLIGHTS.get().clear(); - } - /** OpenSearch client. */ private final OpenSearchClient client; @@ -113,7 +91,19 @@ public Object current() { if (fields.size() == 1) { return resolveForCalcite(current, fields.getFirst()); } - return fields.stream().map(field -> resolveForCalcite(current, field)).toArray(); + return fields.stream() + .map( + field -> { + // _highlight is carried as an opaque ExprValue through the Calcite row pipeline. + // It uses SqlTypeName.ANY so Calcite passes it through without conversion. + if (HIGHLIGHT_FIELD.equals(field)) { + Map tuple = ExprValueUtils.getTupleValue(current); + ExprValue hl = tuple.get(HIGHLIGHT_FIELD); + return (hl != null && !hl.isMissing()) ? (Object) hl : null; + } + return resolveForCalcite(current, field); + }) + .toArray(); } private Object resolveForCalcite(ExprValue value, String rawPath) { @@ -136,12 +126,6 @@ public boolean moveNext() { } if (iterator.hasNext()) { current = iterator.next(); - // Collect highlight data as a side channel for the JDBC response. - // The Calcite row (from current()) only carries schema column values, - // so _highlight must be preserved separately. - Map tuple = ExprValueUtils.getTupleValue(current); - ExprValue hl = tuple.get(HIGHLIGHT_FIELD); - COLLECTED_HIGHLIGHTS.get().add(hl != null && !hl.isMissing() ? hl : null); queryCount++; return true; } else { @@ -154,7 +138,6 @@ public void reset() { bgScanner.reset(request); iterator = bgScanner.fetchNextBatch(request).iterator(); queryCount = 0; - COLLECTED_HIGHLIGHTS.get().clear(); } @Override diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java index 9c8119a49dd..a026bddf46b 100644 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java @@ -111,7 +111,7 @@ public List> highlights() { .map( tuple -> { ExprValue hl = tuple.get(HIGHLIGHT_FIELD); - if (hl == null || hl.isMissing()) { + if (hl == null || hl.isMissing() || hl.isNull()) { return null; } Map hlMap = new LinkedHashMap<>(); From 0283f28e2457dbd0971d8b5489f472b2bcbe17ae Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Thu, 19 Feb 2026 17:03:46 -0800 Subject: [PATCH 08/18] peng - add doc Signed-off-by: Jialiang Liang --- docs/user/ppl/interfaces/endpoint.md | 110 ++++++++++++++++++++++++++- 1 file changed, 109 insertions(+), 1 deletion(-) diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index d5958ba3250..7fb585ada3b 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -53,7 +53,115 @@ Expected output: } ``` -## Explain +## Highlight + +### Description + +You can include an optional `highlight` object in the request body to request search result highlighting. When a PPL query contains a full-text search (e.g., `search source=logs "error"`), OpenSearch identifies where the search terms appear in document fields and returns the matching fragments wrapped in the specified tags. + +The `highlight` object supports the following parameters: + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `fields` | Object | Yes | Map of field names to per-field configuration. Use `"*"` to highlight all `text` and `keyword` fields that match. | +| `pre_tags` | Array of strings | No | Tags inserted before each highlighted term. Default: `[""]`. | +| `post_tags` | Array of strings | No | Tags inserted after each highlighted term. Default: `[""]`. | +| `fragment_size` | Integer | No | Maximum character length of each highlight fragment. Default is OpenSearch's default (100). Set to `2147483647` to return the entire field value without truncation. | + +### What gets highlighted + +- **Full-text search terms only.** The search term in `search source=logs "error"` is translated to a `query_string` query, and OpenSearch's highlighter identifies matches. Structured filters (`where`, `stats`, comparison operators) do not produce highlights. +- **`text` and `keyword` fields only.** Numeric, date, boolean, and other non-string field types never produce highlight fragments. +- **Wildcard field matching.** `"*": {}` matches all eligible fields, including `.keyword` subfields. + +### Example 1: Wildcard highlight + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X POST localhost:9200/_plugins/_ppl \ +-d '{ + "query": "search source=accounts \"Holmes\"", + "highlight": { + "fields": { "*": {} }, + "pre_tags": [""], + "post_tags": [""], + "fragment_size": 2147483647 + } +}' +``` + +Expected output: + +```json +{ + "schema": [ + { "name": "firstname", "type": "string" }, + { "name": "lastname", "type": "string" }, + { "name": "address", "type": "string" } + ], + "datarows": [ + ["Holmes", "Morgan", "123 Main St"], + ["Jane", "Holmes", "456 Oak Ave"], + ["John", "Smith", "880 Holmes Lane"] + ], + "highlights": [ + { "firstname": ["Holmes"], "firstname.keyword": ["Holmes"] }, + { "lastname": ["Holmes"], "lastname.keyword": ["Holmes"] }, + { "address": ["880 Holmes Lane"] } + ], + "total": 3, + "size": 3, + "status": 200 +} +``` + +### Example 2: Specific field with custom tags + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X POST localhost:9200/_plugins/_ppl \ +-d '{ + "query": "search source=accounts \"Holmes\"", + "highlight": { + "fields": { "address": {} }, + "pre_tags": [""], + "post_tags": [""] + } +}' +``` + +Expected output: + +```json +{ + "schema": [ ... ], + "datarows": [ ... ], + "highlights": [ + null, + null, + { "address": ["880 Holmes Lane"] } + ], + "total": 3, + "size": 3, + "status": 200 +} +``` + +Only the `address` field is highlighted. Rows where "Holmes" appears in other fields have `null` highlight entries. + +### Response format + +- The `highlights` array is parallel to `datarows` — each entry corresponds to the row at the same index. +- Entries are `null` when a row has no highlight data for the requested fields. +- The `highlights` array is **omitted entirely** when no `highlight` config is provided in the request (backward compatible). + +### Notes + +- Highlighting is supported only in the Calcite engine. +- The backend forwards the highlight config as-is to OpenSearch. The same highlighting behavior and limitations as [OpenSearch's highlighting API](https://opensearch.org/docs/latest/search-plugins/searching-data/highlight/) apply. +- Piped commands (`where`, `sort`, `head`, `dedup`) narrow or reorder the result set but do not affect which terms are highlighted. + +## Explain ### Description From 97a841851cc19c0c185fb76f29df946520a904b8 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Thu, 19 Feb 2026 20:10:37 -0800 Subject: [PATCH 09/18] peng - hiden col solution 2 Signed-off-by: Jialiang Liang --- .../sql/calcite/CalciteRelNodeVisitor.java | 10 ++++++++ docs/user/ppl/interfaces/endpoint.md | 3 ++- .../calcite/remote/CalcitePPLHighlightIT.java | 24 ++++++++++++------- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 84386dd0084..f8ef0247c60 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -23,6 +23,7 @@ import static org.opensearch.sql.calcite.utils.PlanUtils.getRelation; import static org.opensearch.sql.calcite.utils.PlanUtils.getRexCall; import static org.opensearch.sql.calcite.utils.PlanUtils.transformPlanToAttachChild; +import static org.opensearch.sql.expression.HighlightExpression.HIGHLIGHT_FIELD; import static org.opensearch.sql.utils.SystemIndexUtils.DATASOURCES_TABLE_NAME; import com.google.common.base.Strings; @@ -407,6 +408,15 @@ public RelNode visitProject(Project node, CalcitePlanContext context) { if (!context.isResolvingSubquery()) { context.setProjectVisited(true); } + // When highlight is active, include _highlight in the projection so it survives + // through the Calcite pipeline. This matches DSL behavior where _source filtering + // does not affect highlights. + if (CalcitePlanContext.getHighlightConfig() != null) { + int hlIndex = currentFields.indexOf(HIGHLIGHT_FIELD); + if (hlIndex >= 0) { + expandedFields.add(context.relBuilder.field(hlIndex)); + } + } context.relBuilder.project(expandedFields); } return context.relBuilder.peek(); diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index 7fb585ada3b..41f60d0ce74 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -155,11 +155,12 @@ Only the `address` field is highlighted. Rows where "Holmes" appears in other fi - Entries are `null` when a row has no highlight data for the requested fields. - The `highlights` array is **omitted entirely** when no `highlight` config is provided in the request (backward compatible). -### Notes +### Limitations - Highlighting is supported only in the Calcite engine. - The backend forwards the highlight config as-is to OpenSearch. The same highlighting behavior and limitations as [OpenSearch's highlighting API](https://opensearch.org/docs/latest/search-plugins/searching-data/highlight/) apply. - Piped commands (`where`, `sort`, `head`, `dedup`) narrow or reorder the result set but do not affect which terms are highlighted. +- Highlighting works with **single-source queries only**. Joins (`| join`), subqueries, and multi-source queries are not supported — the `highlight` config is ignored in these cases. ## Explain diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java index 05a6e9080ce..8e76b8c8ce4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java @@ -43,11 +43,15 @@ public void testHighlightWithWildcardFields() throws IOException { boolean foundHighlight = false; for (int i = 0; i < highlights.length(); i++) { if (!highlights.isNull(i)) { - String hlStr = highlights.get(i).toString(); - if (hlStr.contains("Street")) { - foundHighlight = true; - break; + JSONObject hl = highlights.getJSONObject(i); + for (String key : hl.keySet()) { + String fragment = hl.getJSONArray(key).getString(0); + if (fragment.contains("") && fragment.contains("Street")) { + foundHighlight = true; + break; + } } + if (foundHighlight) break; } } assertTrue("Expected at least one highlight with tags", foundHighlight); @@ -87,11 +91,15 @@ public void testHighlightWithCustomTags() throws IOException { boolean foundCustomTag = false; for (int i = 0; i < highlights.length(); i++) { if (!highlights.isNull(i)) { - String hlStr = highlights.get(i).toString(); - if (hlStr.contains("")) { - foundCustomTag = true; - break; + JSONObject hl = highlights.getJSONObject(i); + for (String key : hl.keySet()) { + String fragment = hl.getJSONArray(key).getString(0); + if (fragment.contains("")) { + foundCustomTag = true; + break; + } } + if (foundCustomTag) break; } } assertTrue("Expected custom tags in highlights", foundCustomTag); From cc6101a2fdf422bbe60cc122a8fe39dec0159c34 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 20 Feb 2026 03:00:53 -0800 Subject: [PATCH 10/18] update doc Signed-off-by: Jialiang Liang --- docs/user/ppl/interfaces/endpoint.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index 41f60d0ce74..9a7c59449ac 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -160,7 +160,7 @@ Only the `address` field is highlighted. Rows where "Holmes" appears in other fi - Highlighting is supported only in the Calcite engine. - The backend forwards the highlight config as-is to OpenSearch. The same highlighting behavior and limitations as [OpenSearch's highlighting API](https://opensearch.org/docs/latest/search-plugins/searching-data/highlight/) apply. - Piped commands (`where`, `sort`, `head`, `dedup`) narrow or reorder the result set but do not affect which terms are highlighted. -- Highlighting works with **single-source queries only**. Joins (`| join`), subqueries, and multi-source queries are not supported — the `highlight` config is ignored in these cases. +- Highlighting works with **single-source queries only**, consistent with DSL where highlighting is inherently single-index per request. Behavior with joins (`| join`), subqueries, and multi-source queries is not validated. ## Explain From 583b870c2cad70cc74fc1c9bec6471a087c111dd Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 20 Feb 2026 09:34:58 -0800 Subject: [PATCH 11/18] update docs Signed-off-by: Jialiang Liang --- docs/user/ppl/interfaces/endpoint.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index 9a7c59449ac..b0f38f872c6 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -72,7 +72,6 @@ The `highlight` object supports the following parameters: - **Full-text search terms only.** The search term in `search source=logs "error"` is translated to a `query_string` query, and OpenSearch's highlighter identifies matches. Structured filters (`where`, `stats`, comparison operators) do not produce highlights. - **`text` and `keyword` fields only.** Numeric, date, boolean, and other non-string field types never produce highlight fragments. -- **Wildcard field matching.** `"*": {}` matches all eligible fields, including `.keyword` subfields. ### Example 1: Wildcard highlight @@ -153,13 +152,17 @@ Only the `address` field is highlighted. Rows where "Holmes" appears in other fi - The `highlights` array is parallel to `datarows` — each entry corresponds to the row at the same index. - Entries are `null` when a row has no highlight data for the requested fields. -- The `highlights` array is **omitted entirely** when no `highlight` config is provided in the request (backward compatible). +- The `highlights` array is **omitted entirely** when no `highlight` config is provided in the request (backward compatible). When no highlight config is provided, there is zero impact on query processing — no extra columns, no extra overhead. + +### Behavior with piped commands + +- Piped commands (`where`, `sort`, `head`, `dedup`) narrow or reorder the result set but do not affect which terms are highlighted. Only the full-text search term produces highlights. +- Explicit field projection (`| fields name, age`) preserves highlight data, consistent with DSL where `_source` filtering does not affect highlights. ### Limitations - Highlighting is supported only in the Calcite engine. - The backend forwards the highlight config as-is to OpenSearch. The same highlighting behavior and limitations as [OpenSearch's highlighting API](https://opensearch.org/docs/latest/search-plugins/searching-data/highlight/) apply. -- Piped commands (`where`, `sort`, `head`, `dedup`) narrow or reorder the result set but do not affect which terms are highlighted. - Highlighting works with **single-source queries only**, consistent with DSL where highlighting is inherently single-index per request. Behavior with joins (`| join`), subqueries, and multi-source queries is not validated. ## Explain From b95f3336ce28efd90e2bd98c142ca8a0e97007a7 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 23 Feb 2026 11:53:10 -0800 Subject: [PATCH 12/18] enhance highlight IT Signed-off-by: Jialiang Liang --- .../sql/calcite/remote/CalcitePPLHighlightIT.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java index 8e76b8c8ce4..e74c0c9e96c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java @@ -68,13 +68,21 @@ public void testHighlightWithSpecificField() throws IOException { assertTrue(result.has("highlights")); JSONArray highlights = result.getJSONArray("highlights"); + boolean foundAddressHighlight = false; for (int i = 0; i < highlights.length(); i++) { if (!highlights.isNull(i)) { JSONObject hl = highlights.getJSONObject(i); // Only address field should be highlighted, not other text fields assertFalse("Should not highlight firstname field", hl.has("firstname")); + if (hl.has("address")) { + String fragment = hl.getJSONArray("address").getString(0); + assertTrue("address highlight should contain tags", fragment.contains("")); + assertTrue("address highlight should contain Street", fragment.contains("Street")); + foundAddressHighlight = true; + } } } + assertTrue("Expected at least one row with address highlighted", foundAddressHighlight); } @Test From ddd292b93225f834ba769e8bca1d2b93bc801066 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 23 Feb 2026 16:27:10 -0800 Subject: [PATCH 13/18] fix tests Signed-off-by: Jialiang Liang --- .../sql/calcite/remote/CalcitePPLHighlightIT.java | 12 ++++++------ .../format/SimpleJsonResponseFormatterTest.java | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java index e74c0c9e96c..617123c45ba 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLHighlightIT.java @@ -30,7 +30,7 @@ public void init() throws Exception { } @Test - public void testHighlightWithWildcardFields() throws IOException { + public void testHighlightWildcardFieldsPresent() throws IOException { JSONObject result = executeQueryWithHighlight( "search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\"", @@ -58,7 +58,7 @@ public void testHighlightWithWildcardFields() throws IOException { } @Test - public void testHighlightWithSpecificField() throws IOException { + public void testHighlightSpecificFieldOnly() throws IOException { JSONObject result = executeQueryWithHighlight( "search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\"", @@ -86,7 +86,7 @@ public void testHighlightWithSpecificField() throws IOException { } @Test - public void testHighlightWithCustomTags() throws IOException { + public void testHighlightCustomTagsApplied() throws IOException { JSONObject result = executeQueryWithHighlight( "search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\"", @@ -114,7 +114,7 @@ public void testHighlightWithCustomTags() throws IOException { } @Test - public void testNoHighlightWhenNotRequested() throws IOException { + public void testHighlightOmittedWhenNotRequested() throws IOException { JSONObject result = executeQueryNoHighlight("search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\""); @@ -122,7 +122,7 @@ public void testNoHighlightWhenNotRequested() throws IOException { } @Test - public void testHighlightWithPipedFilter() throws IOException { + public void testHighlightAlignedAfterPipedFilter() throws IOException { JSONObject result = executeQueryWithHighlight( "search source=" + TEST_INDEX_ACCOUNT + " \\\"Street\\\" | where age > 30", @@ -135,7 +135,7 @@ public void testHighlightWithPipedFilter() throws IOException { } @Test - public void testExplainWithHighlight() throws IOException { + public void testExplainContainsHighlightClause() throws IOException { Request request = new Request("POST", "/_plugins/_ppl/_explain"); request.setJsonEntity( String.format( diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java index 25aa78a2c8c..b89fb878f21 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java @@ -185,7 +185,7 @@ void formatErrorPretty() { } @Test - void formatResponseWithHighlights() { + void testFormatResponseHighlightsPresent() { QueryResult response = new QueryResult( schema, @@ -208,7 +208,7 @@ void formatResponseWithHighlights() { } @Test - void formatResponseWithoutHighlightsOmitsField() { + void testFormatResponseHighlightsOmittedWhenAbsent() { QueryResult response = new QueryResult( schema, From 12672d676aa669d40509e5d2f74fd38f2eb6d0b9 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 23 Feb 2026 16:37:23 -0800 Subject: [PATCH 14/18] coderabbit - enhancement of tests Signed-off-by: Jialiang Liang --- .../protocol/response/QueryResultTest.java | 83 ++++++++++++++++++- 1 file changed, 79 insertions(+), 4 deletions(-) diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java index cfa0dd5a328..20e83d2f4a3 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java @@ -113,7 +113,7 @@ void iterate() { } @Test - void iterate_excludes_highlight_from_datarows() { + void testIterateExcludesHighlightFromDatarows() { QueryResult response = new QueryResult( schema, @@ -139,7 +139,7 @@ void iterate_excludes_highlight_from_datarows() { } @Test - void highlights_returns_highlight_data() { + void testHighlightsReturnsSingleRowData() { QueryResult response = new QueryResult( schema, @@ -164,7 +164,7 @@ void highlights_returns_highlight_data() { } @Test - void highlights_returns_null_when_no_highlight_data() { + void testHighlightsReturnsNullWhenNoHighlightField() { QueryResult response = new QueryResult( schema, @@ -177,7 +177,7 @@ void highlights_returns_null_when_no_highlight_data() { } @Test - void highlights_returns_null_when_highlight_is_missing() { + void testHighlightsReturnsNullWhenHighlightIsMissing() { QueryResult response = new QueryResult( schema, @@ -196,4 +196,79 @@ void highlights_returns_null_when_highlight_is_missing() { assertEquals(1, highlights.size()); assertNull(highlights.get(0)); } + + @Test + void testHighlightsReturnsEmptyListWhenNoRows() { + QueryResult response = new QueryResult(schema, Collections.emptyList(), Cursor.None); + + List> highlights = response.highlights(); + assertEquals(0, highlights.size()); + } + + @Test + void testHighlightsReturnsNullWhenHighlightIsNull() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList( + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.stringValue("John"), + "age", + ExprValueUtils.integerValue(20), + "_highlight", + ExprValueUtils.LITERAL_NULL))), + Cursor.None); + + List> highlights = response.highlights(); + assertEquals(1, highlights.size()); + assertNull(highlights.get(0)); + } + + @Test + void testHighlightsMultiRowAlignment() { + QueryResult response = + new QueryResult( + schema, + Arrays.asList( + // Row 0: has highlight on name + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.stringValue("John"), + "age", + ExprValueUtils.integerValue(20), + "_highlight", + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.collectionValue( + ImmutableList.of("John")))))), + // Row 1: no highlight + tupleValue(ImmutableMap.of("name", "Allen", "age", 30)), + // Row 2: has highlight on age (as string field) + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.stringValue("Smith"), + "age", + ExprValueUtils.integerValue(40), + "_highlight", + ExprTupleValue.fromExprValueMap( + ImmutableMap.of( + "name", + ExprValueUtils.collectionValue( + ImmutableList.of("Smith"))))))), + Cursor.None); + + List> highlights = response.highlights(); + assertEquals(3, highlights.size()); + // Row 0: highlight present + assertEquals(ImmutableMap.of("name", ImmutableList.of("John")), highlights.get(0)); + // Row 1: no highlight → null + assertNull(highlights.get(1)); + // Row 2: highlight present + assertEquals(ImmutableMap.of("name", ImmutableList.of("Smith")), highlights.get(2)); + } } From 063f7d085fc32c04f753ea11bc299b49e60579c8 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 23 Feb 2026 18:31:04 -0800 Subject: [PATCH 15/18] fix doc tests Signed-off-by: Jialiang Liang --- docs/user/ppl/interfaces/endpoint.md | 86 ++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 23 deletions(-) diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index b0f38f872c6..f940f3d2da9 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -79,7 +79,7 @@ The `highlight` object supports the following parameters: curl -sS -H 'Content-Type: application/json' \ -X POST localhost:9200/_plugins/_ppl \ -d '{ - "query": "search source=accounts \"Holmes\"", + "query": "search source=accounts \\\"Street\\\" | fields firstname, address", "highlight": { "fields": { "*": {} }, "pre_tags": [""], @@ -94,23 +94,39 @@ Expected output: ```json { "schema": [ - { "name": "firstname", "type": "string" }, - { "name": "lastname", "type": "string" }, - { "name": "address", "type": "string" } + { + "name": "firstname", + "type": "string" + }, + { + "name": "address", + "type": "string" + } ], "datarows": [ - ["Holmes", "Morgan", "123 Main St"], - ["Jane", "Holmes", "456 Oak Ave"], - ["John", "Smith", "880 Holmes Lane"] + [ + "Hattie", + "671 Bristol Street" + ], + [ + "Nanette", + "789 Madison Street" + ] ], "highlights": [ - { "firstname": ["Holmes"], "firstname.keyword": ["Holmes"] }, - { "lastname": ["Holmes"], "lastname.keyword": ["Holmes"] }, - { "address": ["880 Holmes Lane"] } + { + "address": [ + "671 Bristol Street" + ] + }, + { + "address": [ + "789 Madison Street" + ] + } ], - "total": 3, - "size": 3, - "status": 200 + "total": 2, + "size": 2 } ``` @@ -120,7 +136,7 @@ Expected output: curl -sS -H 'Content-Type: application/json' \ -X POST localhost:9200/_plugins/_ppl \ -d '{ - "query": "search source=accounts \"Holmes\"", + "query": "search source=accounts \\\"Street\\\" | fields firstname, address", "highlight": { "fields": { "address": {} }, "pre_tags": [""], @@ -133,20 +149,44 @@ Expected output: ```json { - "schema": [ ... ], - "datarows": [ ... ], + "schema": [ + { + "name": "firstname", + "type": "string" + }, + { + "name": "address", + "type": "string" + } + ], + "datarows": [ + [ + "Hattie", + "671 Bristol Street" + ], + [ + "Nanette", + "789 Madison Street" + ] + ], "highlights": [ - null, - null, - { "address": ["880 Holmes Lane"] } + { + "address": [ + "671 Bristol Street" + ] + }, + { + "address": [ + "789 Madison Street" + ] + } ], - "total": 3, - "size": 3, - "status": 200 + "total": 2, + "size": 2 } ``` -Only the `address` field is highlighted. Rows where "Holmes" appears in other fields have `null` highlight entries. +Only the `address` field is highlighted. The `` custom tags are used instead of the default `` tags. ### Response format From 798396c16202297071417121791a6c12cf9012da Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Wed, 25 Feb 2026 18:20:56 -0800 Subject: [PATCH 16/18] peng - fix tests with more assertions Signed-off-by: Jialiang Liang --- .../rest-api-spec/test/ppl_highlight.yml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl_highlight.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl_highlight.yml index e6584c971e0..63d102fbfae 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl_highlight.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl_highlight.yml @@ -50,7 +50,7 @@ teardown: Content-Type: 'application/json' ppl: body: - query: 'search source=ppl_highlight_test "error"' + query: 'search source=ppl_highlight_test "error" | sort code' highlight: fields: "*": {} @@ -60,7 +60,11 @@ teardown: - "" - match: {"size": 2} - - is_true: highlights + - length: {highlights: 2} + - match: {highlights.0.message.0: "Connection error occurred"} + - match: {highlights.0.status.0: "error response"} + - match: {highlights.1.message.0: "Timeout error in service"} + - match: {highlights.1.status.0: "error timeout"} --- "PPL search without highlight is backward compatible": @@ -87,7 +91,7 @@ teardown: Content-Type: 'application/json' ppl: body: - query: 'search source=ppl_highlight_test "error"' + query: 'search source=ppl_highlight_test "error" | sort code' highlight: fields: "*": {} @@ -97,4 +101,8 @@ teardown: - "" - match: {"size": 2} - - is_true: highlights + - length: {highlights: 2} + - match: {highlights.0.message.0: "Connection error occurred"} + - match: {highlights.0.status.0: "error response"} + - match: {highlights.1.message.0: "Timeout error in service"} + - match: {highlights.1.status.0: "error timeout"} From 5bda888a89e9658b52b11627d61dcdf25e61725d Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Wed, 25 Feb 2026 20:29:48 -0800 Subject: [PATCH 17/18] Generalize highlight plumbing to generic DSL merge mechanism Signed-off-by: Jialiang Liang --- .../sql/calcite/CalcitePlanContext.java | 28 +++++---- .../sql/calcite/CalciteRelNodeVisitor.java | 2 +- .../sql/executor/execution/AbstractPlan.java | 10 +-- .../sql/executor/execution/ExplainPlan.java | 13 ++-- .../sql/executor/execution/QueryPlan.java | 13 ++-- .../executor/execution/ExplainPlanTest.java | 21 +++---- .../sql/executor/execution/QueryPlanTest.java | 21 +++---- .../scan/AbstractCalciteIndexScan.java | 62 +++++++++--------- .../scan/CalciteEnumerableIndexScan.java | 2 +- .../storage/scan/CalciteLogicalIndexScan.java | 2 +- ...alciteIndexScanExtraSearchSourceTest.java} | 63 +++++++++---------- .../org/opensearch/sql/ppl/PPLService.java | 17 +++-- .../sql/ppl/domain/PPLQueryRequest.java | 17 +++-- .../sql/ppl/domain/PPLQueryRequestTest.java | 22 ++++--- 14 files changed, 147 insertions(+), 146 deletions(-) rename opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/{AbstractCalciteIndexScanHighlightTest.java => AbstractCalciteIndexScanExtraSearchSourceTest.java} (54%) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java index adf02cee275..462f09c6c90 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java @@ -46,23 +46,29 @@ public class CalcitePlanContext { ThreadLocal.withInitial(() -> true); /** - * Thread-local highlight configuration from the PPL request body. Set by PPLService before query - * execution and read by CalciteEnumerableIndexScan when building the OpenSearch request. The map - * represents the highlight JSON object (fields, pre_tags, post_tags, fragment_size) that the - * caller provides and the backend forwards as-is to OpenSearch. + * Thread-local extra search-source JSON from the PPL request body. Set by PPLService before query + * execution and read by AbstractCalciteIndexScan when building the OpenSearch request. The JSON + * string is parsed via {@code SearchSourceBuilder.fromXContent()} and selectively merged into the + * index scan request. */ - private static final ThreadLocal> highlightConfig = new ThreadLocal<>(); + private static final ThreadLocal extraSearchSource = new ThreadLocal<>(); - public static void setHighlightConfig(Map config) { - highlightConfig.set(config); + public static void setExtraSearchSource(String json) { + extraSearchSource.set(json); } - public static Map getHighlightConfig() { - return highlightConfig.get(); + public static String getExtraSearchSource() { + return extraSearchSource.get(); } - public static void clearHighlightConfig() { - highlightConfig.remove(); + public static void clearExtraSearchSource() { + extraSearchSource.remove(); + } + + /** Convenience check: does the extra search source contain a highlight clause? */ + public static boolean hasHighlightInExtraSearchSource() { + String extra = extraSearchSource.get(); + return extra != null && extra.contains("\"highlight\""); } @Getter @Setter private boolean isResolvingJoinCondition = false; diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index f8ef0247c60..d59442150f4 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -411,7 +411,7 @@ public RelNode visitProject(Project node, CalcitePlanContext context) { // When highlight is active, include _highlight in the projection so it survives // through the Calcite pipeline. This matches DSL behavior where _source filtering // does not affect highlights. - if (CalcitePlanContext.getHighlightConfig() != null) { + if (CalcitePlanContext.hasHighlightInExtraSearchSource()) { int hlIndex = currentFields.indexOf(HIGHLIGHT_FIELD); if (hlIndex >= 0) { expandedFields.add(context.relBuilder.field(hlIndex)); diff --git a/core/src/main/java/org/opensearch/sql/executor/execution/AbstractPlan.java b/core/src/main/java/org/opensearch/sql/executor/execution/AbstractPlan.java index f9e3c99dc88..35e922cb647 100644 --- a/core/src/main/java/org/opensearch/sql/executor/execution/AbstractPlan.java +++ b/core/src/main/java/org/opensearch/sql/executor/execution/AbstractPlan.java @@ -5,7 +5,6 @@ package org.opensearch.sql.executor.execution; -import java.util.Map; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.Setter; @@ -25,12 +24,13 @@ public abstract class AbstractPlan { @Getter protected final QueryType queryType; /** - * Highlight configuration from the PPL request body. Set by PPLService before submitting the plan - * to the query manager. The plan carries this config across the thread boundary (REST handler + * Extra search-source JSON from the PPL request body. Set by PPLService before submitting the + * plan to the query manager. The plan carries this across the thread boundary (REST handler * thread → sql-worker thread), and the worker thread sets it as a ThreadLocal before Calcite - * planning and execution begin. + * planning and execution begin. The JSON is later parsed via {@code + * SearchSourceBuilder.fromXContent()} and selectively merged into the index scan request. */ - @Getter @Setter private Map highlightConfig; + @Getter @Setter private String extraSearchSource; /** Start query execution. */ public abstract void execute(); diff --git a/core/src/main/java/org/opensearch/sql/executor/execution/ExplainPlan.java b/core/src/main/java/org/opensearch/sql/executor/execution/ExplainPlan.java index aeab7181ee9..95ac07ae8cc 100644 --- a/core/src/main/java/org/opensearch/sql/executor/execution/ExplainPlan.java +++ b/core/src/main/java/org/opensearch/sql/executor/execution/ExplainPlan.java @@ -5,7 +5,6 @@ package org.opensearch.sql.executor.execution; -import java.util.Map; import org.opensearch.sql.ast.statement.ExplainMode; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.common.response.ResponseListener; @@ -36,18 +35,18 @@ public ExplainPlan( @Override public void execute() { - setHighlightThreadLocal(); + setExtraSearchSourceThreadLocal(); try { plan.explain(explainListener, mode); } finally { - CalcitePlanContext.clearHighlightConfig(); + CalcitePlanContext.clearExtraSearchSource(); } } - private void setHighlightThreadLocal() { - Map config = getHighlightConfig(); - if (config != null) { - CalcitePlanContext.setHighlightConfig(config); + private void setExtraSearchSourceThreadLocal() { + String extra = getExtraSearchSource(); + if (extra != null) { + CalcitePlanContext.setExtraSearchSource(extra); } } diff --git a/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java b/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java index e5f4e1eb868..4d1667df57a 100644 --- a/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java +++ b/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java @@ -5,7 +5,6 @@ package org.opensearch.sql.executor.execution; -import java.util.Map; import java.util.Optional; import org.apache.commons.lang3.NotImplementedException; import org.opensearch.sql.ast.statement.ExplainMode; @@ -62,7 +61,7 @@ public QueryPlan( @Override public void execute() { - setHighlightThreadLocal(); + setExtraSearchSourceThreadLocal(); try { if (pageSize.isPresent()) { queryService.execute(new Paginate(pageSize.get(), plan), getQueryType(), listener); @@ -70,7 +69,7 @@ public void execute() { queryService.execute(plan, getQueryType(), listener); } } finally { - CalcitePlanContext.clearHighlightConfig(); + CalcitePlanContext.clearExtraSearchSource(); } } @@ -86,10 +85,10 @@ public void explain( } } - private void setHighlightThreadLocal() { - Map config = getHighlightConfig(); - if (config != null) { - CalcitePlanContext.setHighlightConfig(config); + private void setExtraSearchSourceThreadLocal() { + String extra = getExtraSearchSource(); + if (extra != null) { + CalcitePlanContext.setExtraSearchSource(extra); } } } diff --git a/core/src/test/java/org/opensearch/sql/executor/execution/ExplainPlanTest.java b/core/src/test/java/org/opensearch/sql/executor/execution/ExplainPlanTest.java index e170fe2a522..bba5217051e 100644 --- a/core/src/test/java/org/opensearch/sql/executor/execution/ExplainPlanTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/execution/ExplainPlanTest.java @@ -14,7 +14,6 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; -import java.util.Map; import java.util.concurrent.atomic.AtomicReference; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -63,34 +62,34 @@ public void explainThrowException() { } @Test - public void execute_sets_highlight_threadlocal_for_explain() { - Map highlightConfig = Map.of("fields", Map.of("*", Map.of())); - AtomicReference> captured = new AtomicReference<>(); + public void execute_sets_extra_search_source_threadlocal_for_explain() { + String extraSearchSource = "{\"highlight\":{\"fields\":{\"*\":{}}}}"; + AtomicReference captured = new AtomicReference<>(); doAnswer( invocation -> { - captured.set(CalcitePlanContext.getHighlightConfig()); + captured.set(CalcitePlanContext.getExtraSearchSource()); return null; }) .when(queryPlan) .explain(any(), any()); ExplainPlan explainPlan = new ExplainPlan(queryId, queryType, queryPlan, mode, explainListener); - explainPlan.setHighlightConfig(highlightConfig); + explainPlan.setExtraSearchSource(extraSearchSource); explainPlan.execute(); - assertEquals(highlightConfig, captured.get()); + assertEquals(extraSearchSource, captured.get()); } @Test - public void execute_clears_highlight_threadlocal_after_explain() { - Map highlightConfig = Map.of("fields", Map.of("*", Map.of())); + public void execute_clears_extra_search_source_threadlocal_after_explain() { + String extraSearchSource = "{\"highlight\":{\"fields\":{\"*\":{}}}}"; doNothing().when(queryPlan).explain(any(), any()); ExplainPlan explainPlan = new ExplainPlan(queryId, queryType, queryPlan, mode, explainListener); - explainPlan.setHighlightConfig(highlightConfig); + explainPlan.setExtraSearchSource(extraSearchSource); explainPlan.execute(); - assertNull(CalcitePlanContext.getHighlightConfig()); + assertNull(CalcitePlanContext.getExtraSearchSource()); } } diff --git a/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java b/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java index 980a6ef9cce..edbf95996e5 100644 --- a/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java @@ -16,7 +16,6 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; -import java.util.Map; import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.lang3.NotImplementedException; import org.junit.jupiter.api.DisplayNameGeneration; @@ -134,33 +133,33 @@ public void onFailure(Exception e) { } @Test - public void execute_sets_highlight_threadlocal() { - Map highlightConfig = Map.of("fields", Map.of("*", Map.of())); - AtomicReference> captured = new AtomicReference<>(); + public void execute_sets_extra_search_source_threadlocal() { + String extraSearchSource = "{\"highlight\":{\"fields\":{\"*\":{}}}}"; + AtomicReference captured = new AtomicReference<>(); doAnswer( invocation -> { - captured.set(CalcitePlanContext.getHighlightConfig()); + captured.set(CalcitePlanContext.getExtraSearchSource()); return null; }) .when(queryService) .execute(any(), any(), any()); QueryPlan query = new QueryPlan(queryId, queryType, plan, queryService, queryListener); - query.setHighlightConfig(highlightConfig); + query.setExtraSearchSource(extraSearchSource); query.execute(); - assertEquals(highlightConfig, captured.get()); + assertEquals(extraSearchSource, captured.get()); } @Test - public void execute_clears_highlight_threadlocal_after_execution() { - Map highlightConfig = Map.of("fields", Map.of("*", Map.of())); + public void execute_clears_extra_search_source_threadlocal_after_execution() { + String extraSearchSource = "{\"highlight\":{\"fields\":{\"*\":{}}}}"; QueryPlan query = new QueryPlan(queryId, queryType, plan, queryService, queryListener); - query.setHighlightConfig(highlightConfig); + query.setExtraSearchSource(extraSearchSource); query.execute(); - assertNull(CalcitePlanContext.getHighlightConfig()); + assertNull(CalcitePlanContext.getExtraSearchSource()); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index e1fedb8bbc9..9028e80b18b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -5,7 +5,9 @@ package org.opensearch.sql.opensearch.storage.scan; +import static java.util.Collections.emptyList; import static java.util.Objects.requireNonNull; +import static org.opensearch.core.xcontent.DeprecationHandler.IGNORE_DEPRECATIONS; import static org.opensearch.sql.common.setting.Settings.Key.CALCITE_PUSHDOWN_ROWCOUNT_ESTIMATION_FACTOR; import static org.opensearch.sql.opensearch.request.PredicateAnalyzer.ScriptQueryExpression.getScriptSortType; import static org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper.MISSING_MAX; @@ -45,7 +47,12 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.checkerframework.checker.nullness.qual.Nullable; -import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.search.SearchModule; +import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.sort.ScoreSortBuilder; import org.opensearch.search.sort.ScriptSortBuilder.ScriptSortType; import org.opensearch.search.sort.SortBuilder; @@ -75,6 +82,9 @@ @Getter public abstract class AbstractCalciteIndexScan extends TableScan implements AliasFieldsWrappable { private static final Logger LOG = LogManager.getLogger(AbstractCalciteIndexScan.class); + + private static final NamedXContentRegistry X_CONTENT_REGISTRY = + new NamedXContentRegistry(new SearchModule(Settings.EMPTY, emptyList()).getNamedXContents()); public final OpenSearchIndex osIndex; // The schema of this scan operator, it's initialized with the row type of the table, but may be // changed by push down operations. @@ -110,7 +120,7 @@ public RelWriter explainTerms(RelWriter pw) { String explainString = String.valueOf(pushDownContext); if (pw instanceof RelWriterImpl) { OpenSearchRequestBuilder requestBuilder = pushDownContext.createRequestBuilder(); - applyHighlightConfig(requestBuilder); + applyExtraSearchSource(requestBuilder); explainString += ", " + requestBuilder; } return super.explainTerms(pw) @@ -118,44 +128,28 @@ public RelWriter explainTerms(RelWriter pw) { } /** - * Apply highlight configuration from the ThreadLocal to the OpenSearch request builder. The - * highlight config is set on a ThreadLocal by the plan's execute() method (on the worker thread) - * and forwarded as-is to OpenSearch. + * Apply extra search-source JSON from the ThreadLocal to the OpenSearch request builder. The JSON + * is parsed via {@code SearchSourceBuilder.fromXContent()} and any recognized clauses (highlight, + * suggest, rescore, etc.) are selectively merged into the target request. * - * @param requestBuilder the OpenSearch request builder to attach the highlight clause to + * @param requestBuilder the OpenSearch request builder to merge extra clauses into */ - @SuppressWarnings("unchecked") - protected static void applyHighlightConfig(OpenSearchRequestBuilder requestBuilder) { - Map config = CalcitePlanContext.getHighlightConfig(); - if (config == null) { + protected static void applyExtraSearchSource(OpenSearchRequestBuilder requestBuilder) { + String json = CalcitePlanContext.getExtraSearchSource(); + if (json == null) { return; } - HighlightBuilder highlightBuilder = new HighlightBuilder(); - Object fieldsObj = config.get("fields"); - if (fieldsObj instanceof Map) { - Map fields = (Map) fieldsObj; - for (String fieldName : fields.keySet()) { - highlightBuilder.field(new HighlightBuilder.Field(fieldName)); - } - } - Object preTagsObj = config.get("pre_tags"); - if (preTagsObj instanceof List) { - List preTags = (List) preTagsObj; - highlightBuilder.preTags(preTags.toArray(new String[0])); - } - Object postTagsObj = config.get("post_tags"); - if (postTagsObj instanceof List) { - List postTags = (List) postTagsObj; - highlightBuilder.postTags(postTags.toArray(new String[0])); - } - Object fragmentSizeObj = config.get("fragment_size"); - if (fragmentSizeObj instanceof Number) { - int fragmentSize = ((Number) fragmentSizeObj).intValue(); - for (HighlightBuilder.Field field : highlightBuilder.fields()) { - field.fragmentSize(fragmentSize); + try { + XContentParser parser = + XContentType.JSON.xContent().createParser(X_CONTENT_REGISTRY, IGNORE_DEPRECATIONS, json); + SearchSourceBuilder extra = SearchSourceBuilder.fromXContent(parser); + SearchSourceBuilder target = requestBuilder.getSourceBuilder(); + if (extra.highlighter() != null) { + target.highlighter(extra.highlighter()); } + } catch (Exception e) { + LOG.warn("Failed to parse extra search source JSON, skipping: {}", e.getMessage()); } - requestBuilder.getSourceBuilder().highlighter(highlightBuilder); } protected Integer getQuerySizeLimit() { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java index 0c1090cda8f..0949c458084 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java @@ -119,7 +119,7 @@ public Result implement(EnumerableRelImplementor implementor, Prefer pref) { @Override public Enumerator enumerator() { OpenSearchRequestBuilder requestBuilder = pushDownContext.createRequestBuilder(); - applyHighlightConfig(requestBuilder); + applyExtraSearchSource(requestBuilder); return new OpenSearchIndexEnumerator( osIndex.getClient(), getRowType().getFieldNames(), diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 76de07a9093..7c0d1d629a1 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -93,7 +93,7 @@ public CalciteLogicalIndexScan( */ private static RelDataType buildInitialSchema(RelOptCluster cluster, RelOptTable table) { RelDataType base = table.getRowType(); - if (CalcitePlanContext.getHighlightConfig() == null) { + if (!CalcitePlanContext.hasHighlightInExtraSearchSource()) { return base; } RelDataTypeFactory.Builder builder = cluster.getTypeFactory().builder(); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScanHighlightTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScanExtraSearchSourceTest.java similarity index 54% rename from opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScanHighlightTest.java rename to opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScanExtraSearchSourceTest.java index 61b2a292e5c..ad0b5e4171d 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScanHighlightTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScanExtraSearchSourceTest.java @@ -8,10 +8,9 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.mockito.Mockito.when; -import java.util.List; -import java.util.Map; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -23,29 +22,29 @@ import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; @ExtendWith(MockitoExtension.class) -class AbstractCalciteIndexScanHighlightTest { +class AbstractCalciteIndexScanExtraSearchSourceTest { @Mock private OpenSearchRequestBuilder requestBuilder; @AfterEach void cleanup() { - CalcitePlanContext.clearHighlightConfig(); + CalcitePlanContext.clearExtraSearchSource(); } @Test - void applyHighlightConfig_withNullConfig_doesNothing() { + void applyExtraSearchSource_withNullConfig_doesNothing() { // No ThreadLocal set — config is null, requestBuilder should not be called - AbstractCalciteIndexScan.applyHighlightConfig(requestBuilder); + AbstractCalciteIndexScan.applyExtraSearchSource(requestBuilder); // If config is null, method returns early — no interaction with requestBuilder } @Test - void applyHighlightConfig_withWildcardFields_setsHighlighter() { + void applyExtraSearchSource_withWildcardFields_setsHighlighter() { SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); when(requestBuilder.getSourceBuilder()).thenReturn(sourceBuilder); - CalcitePlanContext.setHighlightConfig(Map.of("fields", Map.of("*", Map.of()))); - AbstractCalciteIndexScan.applyHighlightConfig(requestBuilder); + CalcitePlanContext.setExtraSearchSource("{\"highlight\":{\"fields\":{\"*\":{}}}}"); + AbstractCalciteIndexScan.applyExtraSearchSource(requestBuilder); HighlightBuilder highlighter = sourceBuilder.highlighter(); assertNotNull(highlighter); @@ -54,16 +53,13 @@ void applyHighlightConfig_withWildcardFields_setsHighlighter() { } @Test - void applyHighlightConfig_withCustomTags_setsTags() { + void applyExtraSearchSource_withCustomTags_setsTags() { SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); when(requestBuilder.getSourceBuilder()).thenReturn(sourceBuilder); - CalcitePlanContext.setHighlightConfig( - Map.of( - "fields", Map.of("message", Map.of()), - "pre_tags", List.of(""), - "post_tags", List.of(""))); - AbstractCalciteIndexScan.applyHighlightConfig(requestBuilder); + CalcitePlanContext.setExtraSearchSource( + "{\"highlight\":{\"fields\":{\"message\":{}},\"pre_tags\":[\"\"],\"post_tags\":[\"\"]}}"); + AbstractCalciteIndexScan.applyExtraSearchSource(requestBuilder); HighlightBuilder highlighter = sourceBuilder.highlighter(); assertArrayEquals(new String[] {""}, highlighter.preTags()); @@ -71,31 +67,34 @@ void applyHighlightConfig_withCustomTags_setsTags() { } @Test - void applyHighlightConfig_withFragmentSize_setsPerField() { + void applyExtraSearchSource_withFragmentSize_setsOnHighlighter() { SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); when(requestBuilder.getSourceBuilder()).thenReturn(sourceBuilder); - CalcitePlanContext.setHighlightConfig( - Map.of("fields", Map.of("*", Map.of()), "fragment_size", 2147483647)); - AbstractCalciteIndexScan.applyHighlightConfig(requestBuilder); + CalcitePlanContext.setExtraSearchSource( + "{\"highlight\":{\"fields\":{\"*\":{}},\"fragment_size\":2147483647}}"); + AbstractCalciteIndexScan.applyExtraSearchSource(requestBuilder); HighlightBuilder highlighter = sourceBuilder.highlighter(); assertEquals(1, highlighter.fields().size()); - assertEquals(Integer.valueOf(2147483647), highlighter.fields().get(0).fragmentSize()); + // fragment_size is parsed as a top-level HighlightBuilder setting by native XContent parsing + assertEquals(Integer.valueOf(2147483647), highlighter.fragmentSize()); } @Test - void applyHighlightConfig_withMalformedConfig_handlesGracefully() { - SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); - when(requestBuilder.getSourceBuilder()).thenReturn(sourceBuilder); - - // "fields" is a String instead of Map — should not throw NPE - CalcitePlanContext.setHighlightConfig(Map.of("fields", "not_a_map")); - AbstractCalciteIndexScan.applyHighlightConfig(requestBuilder); + void applyExtraSearchSource_withMalformedJson_handlesGracefully() { + // Malformed JSON — should not throw, just log warning and skip + CalcitePlanContext.setExtraSearchSource("{not valid json}}}"); + AbstractCalciteIndexScan.applyExtraSearchSource(requestBuilder); + // No exception thrown — method handles gracefully + } - // Should still create a highlighter (just with no fields) - HighlightBuilder highlighter = sourceBuilder.highlighter(); - assertNotNull(highlighter); - assertEquals(0, highlighter.fields().size()); + @Test + void applyExtraSearchSource_withNoHighlight_doesNotSetHighlighter() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + // JSON with no highlight clause — nothing to merge + CalcitePlanContext.setExtraSearchSource("{\"size\":10}"); + AbstractCalciteIndexScan.applyExtraSearchSource(requestBuilder); + assertNull(sourceBuilder.highlighter()); } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java b/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java index 750137e6f07..455eebc97a5 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java @@ -10,7 +10,6 @@ import lombok.extern.log4j.Log4j2; import org.antlr.v4.runtime.tree.ParseTree; -import org.json.JSONObject; import org.opensearch.sql.ast.statement.Statement; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.common.setting.Settings; @@ -66,7 +65,7 @@ public void execute( ResponseListener explainListener) { try { AbstractPlan plan = plan(request, queryListener, explainListener); - setHighlightOnPlan(plan, request); + setExtraSearchSourceOnPlan(plan, request); queryManager.submit(plan); } catch (Exception e) { queryListener.onFailure(e); @@ -83,7 +82,7 @@ public void execute( public void explain(PPLQueryRequest request, ResponseListener listener) { try { AbstractPlan plan = plan(request, NO_CONSUMER_RESPONSE_LISTENER, listener); - setHighlightOnPlan(plan, request); + setExtraSearchSourceOnPlan(plan, request); queryManager.submit(plan); } catch (Exception e) { listener.onFailure(e); @@ -91,13 +90,13 @@ public void explain(PPLQueryRequest request, ResponseListener l } /** - * Set highlight configuration on the plan so it can be carried across the thread boundary. The - * plan's execute() method will set the ThreadLocal on the worker thread. + * Set extra search source on the plan so it can be carried across the thread boundary. The plan's + * execute() method will set the ThreadLocal on the worker thread. */ - private void setHighlightOnPlan(AbstractPlan plan, PPLQueryRequest request) { - JSONObject highlight = request.getHighlight(); - if (highlight != null) { - plan.setHighlightConfig(highlight.toMap()); + private void setExtraSearchSourceOnPlan(AbstractPlan plan, PPLQueryRequest request) { + String extra = request.getExtraSearchSource(); + if (extra != null) { + plan.setExtraSearchSource(extra); } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java b/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java index d144b5f5d0c..c236474bc5c 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java @@ -111,16 +111,21 @@ public int getFetchSize() { } /** - * Get the highlight configuration from the request body. The caller (OSD, API, CLI) controls - * highlighting by providing a highlight object in the PPL request. The backend forwards this - * config as-is to OpenSearch. + * Get extra search-source-compatible JSON from the request body. Currently wraps the {@code + * highlight} field; future extensions (suggest, rescore, post_filter, etc.) can be added here. * - * @return highlight JSONObject from request, or null if not specified + * @return search-source JSON string, or null if no extra fields are present */ - public JSONObject getHighlight() { + public String getExtraSearchSource() { if (jsonContent == null) { return null; } - return jsonContent.optJSONObject("highlight"); + JSONObject highlight = jsonContent.optJSONObject("highlight"); + if (highlight == null) { + return null; + } + JSONObject wrapper = new JSONObject(); + wrapper.put("highlight", highlight); + return wrapper.toString(); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java index 8e4731c0c3c..c0ca166641d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java @@ -96,7 +96,7 @@ public void testGetFetchSizeWithLargeValue() { } @Test - public void testGetHighlightReturnsHighlightObject() { + public void testGetExtraSearchSourceReturnsHighlightWrapper() { JSONObject json = new JSONObject(); json.put("query", "source=t \"error\""); json.put( @@ -104,23 +104,25 @@ public void testGetHighlightReturnsHighlightObject() { new JSONObject( "{\"fields\": {\"*\": {}}, \"pre_tags\": [\"\"], \"post_tags\": [\"\"]}")); PPLQueryRequest request = new PPLQueryRequest("source=t \"error\"", json, "/_plugins/_ppl"); - JSONObject highlight = request.getHighlight(); - assertNotNull(highlight); - assertTrue(highlight.has("fields")); - assertTrue(highlight.has("pre_tags")); - assertTrue(highlight.has("post_tags")); + String extra = request.getExtraSearchSource(); + assertNotNull(extra); + JSONObject parsed = new JSONObject(extra); + assertTrue(parsed.has("highlight")); + assertTrue(parsed.getJSONObject("highlight").has("fields")); + assertTrue(parsed.getJSONObject("highlight").has("pre_tags")); + assertTrue(parsed.getJSONObject("highlight").has("post_tags")); } @Test - public void testGetHighlightReturnsNullWhenNotSpecified() { + public void testGetExtraSearchSourceReturnsNullWhenNotSpecified() { JSONObject json = new JSONObject("{\"query\": \"source=t\"}"); PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); - assertNull(request.getHighlight()); + assertNull(request.getExtraSearchSource()); } @Test - public void testGetHighlightReturnsNullWhenJsonContentIsNull() { + public void testGetExtraSearchSourceReturnsNullWhenJsonContentIsNull() { PPLQueryRequest request = new PPLQueryRequest("source=t", null, "/_plugins/_ppl"); - assertNull(request.getHighlight()); + assertNull(request.getExtraSearchSource()); } } From b290e062f18b4dad583b66967bf21f384bb398b5 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Wed, 25 Feb 2026 23:44:35 -0800 Subject: [PATCH 18/18] chen - fix doc Signed-off-by: Jialiang Liang --- docs/user/ppl/interfaces/endpoint.md | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index f940f3d2da9..dc64568bcc1 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -59,14 +59,7 @@ Expected output: You can include an optional `highlight` object in the request body to request search result highlighting. When a PPL query contains a full-text search (e.g., `search source=logs "error"`), OpenSearch identifies where the search terms appear in document fields and returns the matching fragments wrapped in the specified tags. -The `highlight` object supports the following parameters: - -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `fields` | Object | Yes | Map of field names to per-field configuration. Use `"*"` to highlight all `text` and `keyword` fields that match. | -| `pre_tags` | Array of strings | No | Tags inserted before each highlighted term. Default: `[""]`. | -| `post_tags` | Array of strings | No | Tags inserted after each highlighted term. Default: `[""]`. | -| `fragment_size` | Integer | No | Maximum character length of each highlight fragment. Default is OpenSearch's default (100). Set to `2147483647` to return the entire field value without truncation. | +The `highlight` object is passed as-is to OpenSearch. See the [OpenSearch highlighting documentation](https://docs.opensearch.org/latest/search-plugins/searching-data/highlight/) for all supported parameters. ### What gets highlighted @@ -202,7 +195,7 @@ Only the `address` field is highlighted. The `` custom tags are used inste ### Limitations - Highlighting is supported only in the Calcite engine. -- The backend forwards the highlight config as-is to OpenSearch. The same highlighting behavior and limitations as [OpenSearch's highlighting API](https://opensearch.org/docs/latest/search-plugins/searching-data/highlight/) apply. +- The backend forwards the highlight config as-is to OpenSearch. The same highlighting behavior and limitations as [OpenSearch's highlighting API](https://docs.opensearch.org/latest/search-plugins/searching-data/highlight/) apply. - Highlighting works with **single-source queries only**, consistent with DSL where highlighting is inherently single-index per request. Behavior with joins (`| join`), subqueries, and multi-source queries is not validated. ## Explain