From ff1fd3219ef6bd3644ed684cc318a564b0b003f9 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 26 Feb 2026 17:08:02 -0800 Subject: [PATCH 01/10] [Bug Fix] Explicit head command should override fetch_size in PPL When a user's PPL query contains an explicit head command, the fetch_size parameter should not inject an additional Head node on top. Previously, fetch_size always wrapped the plan with Head(fetchSize), creating a double-Head that could cap results unexpectedly (e.g., head 100 with fetch_size=5 would only return 5 rows). This adds a containsHead() check in AstStatementBuilder to skip the Head injection when the user's query already has a head command, letting the user's explicit intent take precedence. Signed-off-by: Kai Huang --- .../org/opensearch/sql/ppl/FetchSizeIT.java | 55 +++++++++++++++++-- .../sql/ppl/parser/AstStatementBuilder.java | 19 ++++++- .../ppl/parser/AstStatementBuilderTest.java | 14 ++--- 3 files changed, 73 insertions(+), 15 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FetchSizeIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FetchSizeIT.java index abd75536d5..264a298ebb 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/FetchSizeIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FetchSizeIT.java @@ -172,9 +172,9 @@ public void testFetchSizeWithStats() throws IOException { } @Test - public void testFetchSizeWithHead() throws IOException { - // Both head command and fetch_size - the smaller limit should win - // head 3 limits to 3, fetch_size 10 would allow 10, so we get 3 + public void testHeadOverridesFetchSizeWhenSmaller() throws IOException { + // Explicit head takes precedence over fetch_size + // head 3 returns 3 rows regardless of fetch_size=10 JSONObject result = executeQueryWithFetchSize( String.format("source=%s | head 3 | fields firstname", TEST_INDEX_ACCOUNT), 10); @@ -183,16 +183,59 @@ public void testFetchSizeWithHead() throws IOException { } @Test - public void testFetchSizeSmallerThanHead() throws IOException { - // fetch_size smaller than head - fetch_size should further limit - // head 100 would return 100, but fetch_size 5 limits to 5 + public void testHeadOverridesFetchSizeWhenLarger() throws IOException { + // Explicit head takes precedence over fetch_size + // head 100 should return 100 rows even though fetch_size=5 JSONObject result = executeQueryWithFetchSize( String.format("source=%s | head 100 | fields firstname", TEST_INDEX_ACCOUNT), 5); JSONArray dataRows = result.getJSONArray("datarows"); + assertEquals(100, dataRows.length()); + } + + @Test + public void testHeadOverridesFetchSizeWithOffset() throws IOException { + // Explicit head with offset takes precedence over fetch_size + // head 3 from 2 should skip 2 rows and return 3 rows, ignoring fetch_size=100 + JSONObject result = + executeQueryWithFetchSize( + String.format("source=%s | head 3 from 2 | fields firstname", TEST_INDEX_ACCOUNT), 100); + JSONArray dataRows = result.getJSONArray("datarows"); + assertEquals(3, dataRows.length()); + } + + @Test + public void testHeadOverridesFetchSizeWithFilter() throws IOException { + // Explicit head after filter takes precedence over fetch_size + // Even with fetch_size=2, head 5 should return 5 matching rows + JSONObject result = + executeQueryWithFetchSize( + String.format( + "source=%s | where age > 30 | head 5 | fields firstname, age", TEST_INDEX_ACCOUNT), + 2); + JSONArray dataRows = result.getJSONArray("datarows"); assertEquals(5, dataRows.length()); } + @Test + public void testHeadEqualToFetchSize() throws IOException { + // When head and fetch_size are the same value, head takes precedence (no double-Head) + JSONObject result = + executeQueryWithFetchSize( + String.format("source=%s | head 7 | fields firstname", TEST_INDEX_ACCOUNT), 7); + JSONArray dataRows = result.getJSONArray("datarows"); + assertEquals(7, dataRows.length()); + } + + @Test + public void testHeadLargerThanDatasetWithFetchSize() throws IOException { + // head 1000 on a 7-row index with fetch_size=3: head takes precedence, returns all 7 rows + JSONObject result = + executeQueryWithFetchSize(String.format("source=%s | head 1000", TEST_INDEX_BANK), 3); + JSONArray dataRows = result.getJSONArray("datarows"); + assertEquals(7, dataRows.length()); + } + @Test public void testFetchSizeAsUrlParameter() throws IOException { // fetch_size specified as URL parameter instead of JSON body diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java index cee084bc71..d59a08b69f 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java @@ -32,7 +32,7 @@ public class AstStatementBuilder extends OpenSearchPPLParserBaseVisitor 0) { + if (context.getFetchSize() > 0 && !containsHead(rawPlan)) { rawPlan = new Head(context.getFetchSize(), 0).attach(rawPlan); } UnresolvedPlan plan = addSelectAll(rawPlan); @@ -69,6 +69,23 @@ public static class StatementBuilderContext { private final String explainMode; } + /** + * Recursively checks if the AST contains a {@link Head} node. When the user's query already + * includes an explicit {@code head} command, we should not inject an additional Head for + * fetch_size so that the user's explicit limit takes precedence. + */ + private boolean containsHead(UnresolvedPlan plan) { + if (plan instanceof Head) { + return true; + } + for (var child : plan.getChild()) { + if (child instanceof UnresolvedPlan && containsHead((UnresolvedPlan) child)) { + return true; + } + } + return false; + } + private UnresolvedPlan addSelectAll(UnresolvedPlan plan) { if ((plan instanceof Project) && !((Project) plan).isExcluded()) { return plan; diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java index 4229bbc8af..e31f296e90 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java @@ -87,33 +87,31 @@ public void buildQueryStatementWithLargeFetchSize() { @Test public void buildQueryStatementWithFetchSizeAndSmallerHead() { // User query has head 3, fetchSize=10 - // Head(10) wraps Head(3), then Project(*) wraps on top - // The inner head 3 limits first, so only 3 rows are returned + // Explicit head takes precedence over fetch_size, so no outer Head(10) is injected assertEqualWithFetchSize( "source=t | head 3", 10, - new Query(project(head(head(relation("t"), 3, 0), 10, 0), AllFields.of()), 0, PPL)); + new Query(project(head(relation("t"), 3, 0), AllFields.of()), 0, PPL)); } @Test public void buildQueryStatementWithFetchSizeSmallerThanHead() { // User query has head 100, fetchSize=5 - // Head(5) wraps Head(100), then Project(*) wraps on top - // The outer head 5 limits, so only 5 rows are returned + // Explicit head takes precedence over fetch_size, so no outer Head(5) is injected assertEqualWithFetchSize( "source=t | head 100", 5, - new Query(project(head(head(relation("t"), 100, 0), 5, 0), AllFields.of()), 0, PPL)); + new Query(project(head(relation("t"), 100, 0), AllFields.of()), 0, PPL)); } @Test public void buildQueryStatementWithFetchSizeAndHeadWithOffset() { // User query has head 3 from 1 (with offset), fetchSize=10 - // The inner head offset is preserved, outer Head always has offset 0 + // Explicit head takes precedence over fetch_size, so no outer Head(10) is injected assertEqualWithFetchSize( "source=t | head 3 from 1", 10, - new Query(project(head(head(relation("t"), 3, 1), 10, 0), AllFields.of()), 0, PPL)); + new Query(project(head(relation("t"), 3, 1), AllFields.of()), 0, PPL)); } private void assertEqualWithFetchSize(String query, int fetchSize, Statement expectedStatement) { From 7f797a94b72feaaa05f5d15018cac7dcd4d29dc8 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 26 Feb 2026 19:41:02 -0800 Subject: [PATCH 02/10] Update explain plan expectations for head-overrides-fetch_size fix Remove the outer LogicalSort(fetch=[fetchSize]) from expected explain plans when an explicit head command is present, since fetch_size no longer injects a Head node in that case. Signed-off-by: Kai Huang --- .../sql/calcite/remote/CalciteExplainIT.java | 8 ++++---- .../explain_fetch_size_smaller_than_head_push.yaml | 9 ++++----- .../calcite/explain_fetch_size_with_head_push.yaml | 9 ++++----- .../explain_fetch_size_smaller_than_head_push.yaml | 12 +++++------- .../explain_fetch_size_with_head_push.yaml | 14 ++++++-------- 5 files changed, 23 insertions(+), 29 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 8e980d8973..09866251bd 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2579,8 +2579,8 @@ public void testExplainFetchSizePushDown() throws IOException { @Test public void testExplainFetchSizeWithSmallerHead() throws IOException { // fetch_size=10 with user's | head 3 - // Two LogicalSort nodes: inner fetch=[3] from user head, outer fetch=[10] from fetch_size - // Effective limit = min(3, 10) = 3 + // Explicit head takes precedence: only one LogicalSort(fetch=[3]) from user head + // fetch_size does not inject an additional Head when user already has one String expected = loadExpectedPlan("explain_fetch_size_with_head_push.yaml"); assertYamlEqualsIgnoreId( expected, @@ -2591,8 +2591,8 @@ public void testExplainFetchSizeWithSmallerHead() throws IOException { @Test public void testExplainFetchSizeSmallerThanHead() throws IOException { // fetch_size=5 with user's | head 100 - // Two LogicalSort nodes: inner fetch=[100] from user head, outer fetch=[5] from fetch_size - // Effective limit = min(100, 5) = 5 + // Explicit head takes precedence: only one LogicalSort(fetch=[100]) from user head + // fetch_size does not inject an additional Head when user already has one String expected = loadExpectedPlan("explain_fetch_size_smaller_than_head_push.yaml"); assertYamlEqualsIgnoreId( expected, diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_smaller_than_head_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_smaller_than_head_push.yaml index 7c80ddf56d..c2fd1c0711 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_smaller_than_head_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_smaller_than_head_push.yaml @@ -1,9 +1,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalSort(fetch=[5]) - LogicalProject(age=[$8]) - LogicalSort(fetch=[100]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(age=[$8]) + LogicalSort(fetch=[100]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->100, LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->100, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":100,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=100, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_with_head_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_with_head_push.yaml index ba828e445b..f900306a29 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_with_head_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_with_head_push.yaml @@ -1,9 +1,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalSort(fetch=[10]) - LogicalProject(age=[$8]) - LogicalSort(fetch=[3]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(age=[$8]) + LogicalSort(fetch=[3]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->3, LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":3,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=3, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->3, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":3,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=3, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_fetch_size_smaller_than_head_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_fetch_size_smaller_than_head_push.yaml index a3099df5ff..018a232c57 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_fetch_size_smaller_than_head_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_fetch_size_smaller_than_head_push.yaml @@ -1,13 +1,11 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalSort(fetch=[5]) - LogicalProject(age=[$8]) - LogicalSort(fetch=[100]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(age=[$8]) + LogicalSort(fetch=[100]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..16=[{inputs}], age=[$t8]) - EnumerableLimit(fetch=[5]) - EnumerableLimit(fetch=[100]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableLimit(fetch=[100]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_fetch_size_with_head_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_fetch_size_with_head_push.yaml index 6fb5ef8a97..a3720c7021 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_fetch_size_with_head_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_fetch_size_with_head_push.yaml @@ -1,13 +1,11 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalSort(fetch=[10]) - LogicalProject(age=[$8]) - LogicalSort(fetch=[3]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(age=[$8]) + LogicalSort(fetch=[3]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableLimit(fetch=[10]) - EnumerableCalc(expr#0..16=[{inputs}], age=[$t8]) - EnumerableLimit(fetch=[3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableCalc(expr#0..16=[{inputs}], age=[$t8]) + EnumerableLimit(fetch=[3]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) From adea0875531cf498afd31b82bdb6b5b1f0bb7ce3 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Fri, 27 Feb 2026 10:57:19 -0800 Subject: [PATCH 03/10] Add test case for multiple head commands with fetch_size Verifies that when a query has multiple heads (e.g., head 3 | head 500), fetch_size does not inject yet another Head node. The existing Head nodes are preserved as-is, with the inner head 3 limiting first. Signed-off-by: Kai Huang --- .../sql/ppl/parser/AstStatementBuilderTest.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java index e31f296e90..07116f1852 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java @@ -114,6 +114,18 @@ public void buildQueryStatementWithFetchSizeAndHeadWithOffset() { new Query(project(head(relation("t"), 3, 1), AllFields.of()), 0, PPL)); } + @Test + public void buildQueryStatementWithFetchSizeAndMultipleHeads() { + // User query has head 3 | head 500, fetchSize=10 + // containsHead() finds the existing Head nodes, so no Head(10) is injected + // Effective limit is min(3, 500) = 3 since inner head 3 limits first + assertEqualWithFetchSize( + "source=t | head 3 | head 500", + 10, + new Query( + project(head(head(relation("t"), 3, 0), 500, 0), AllFields.of()), 0, PPL)); + } + private void assertEqualWithFetchSize(String query, int fetchSize, Statement expectedStatement) { Node actualPlan = planWithFetchSize(query, fetchSize); assertEquals(expectedStatement, actualPlan); From e62de530f3b5980fb55659ef36bcab20e6cc0729 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Fri, 27 Feb 2026 11:04:16 -0800 Subject: [PATCH 04/10] spotless Signed-off-by: Kai Huang --- .../org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java index 07116f1852..e44066c973 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java @@ -122,8 +122,7 @@ public void buildQueryStatementWithFetchSizeAndMultipleHeads() { assertEqualWithFetchSize( "source=t | head 3 | head 500", 10, - new Query( - project(head(head(relation("t"), 3, 0), 500, 0), AllFields.of()), 0, PPL)); + new Query(project(head(head(relation("t"), 3, 0), 500, 0), AllFields.of()), 0, PPL)); } private void assertEqualWithFetchSize(String query, int fetchSize, Statement expectedStatement) { From a6e1216dba6e05e7561ff40bc9f1c135c5c36c6c Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Fri, 27 Feb 2026 13:53:46 -0800 Subject: [PATCH 05/10] Add documentation for fetch_size and head command interaction Signed-off-by: ahkcs Signed-off-by: Kai Huang --- docs/user/interfaces/endpoint.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/user/interfaces/endpoint.rst b/docs/user/interfaces/endpoint.rst index 26dca94d22..4aac3eaba5 100644 --- a/docs/user/interfaces/endpoint.rst +++ b/docs/user/interfaces/endpoint.rst @@ -294,6 +294,27 @@ Unlike SQL's ``fetch_size`` which enables cursor-based pagination, PPL's ``fetch | Can fetch more? | Yes (with cursor) | No (single response) | +--------------------+-------------------------------------+------------------------------------+ +Interaction with the ``head`` command +-------------------------------------- + +When a PPL query contains an explicit ``head`` command, the ``head`` command takes precedence over ``fetch_size``. Because PPL's ``fetch_size`` does not support pagination, capping the result below the user's explicit ``head`` limit would silently discard rows with no way to retrieve them. To avoid this, ``fetch_size`` is ignored when a ``head`` command is present, and the query returns the number of rows specified by ``head``. + +If the query does **not** contain a ``head`` command, ``fetch_size`` limits the result as usual. + +Examples:: + + # head 100 takes precedence — returns 100 rows, fetch_size=5 is ignored + >> curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ + "fetch_size" : 5, + "query" : "source = accounts | head 100 | fields firstname" + }' + + # No head command — fetch_size=5 limits the result to 5 rows + >> curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ + "fetch_size" : 5, + "query" : "source = accounts | fields firstname" + }' + Example 1: JSON body ------- From 5841f080a4ec1cc54d975fd02c3c4089ea0171d5 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 2 Mar 2026 12:07:41 -0800 Subject: [PATCH 06/10] Refine containsHead to only check main pipeline, not subqueries Change from recursive all-children traversal to iterative first-child chain walk. This ensures head commands inside join subqueries or nested structures do not incorrectly suppress fetch_size injection on the outer query. Signed-off-by: ahkcs Signed-off-by: Kai Huang --- .../sql/ppl/parser/AstStatementBuilder.java | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java index d59a08b69f..15b1b88a58 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java @@ -8,9 +8,11 @@ import static org.opensearch.sql.executor.QueryType.PPL; import com.google.common.collect.ImmutableList; +import java.util.List; import lombok.Builder; import lombok.Data; import lombok.RequiredArgsConstructor; +import org.opensearch.sql.ast.Node; import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.statement.Explain; import org.opensearch.sql.ast.statement.Query; @@ -70,18 +72,22 @@ public static class StatementBuilderContext { } /** - * Recursively checks if the AST contains a {@link Head} node. When the user's query already - * includes an explicit {@code head} command, we should not inject an additional Head for - * fetch_size so that the user's explicit limit takes precedence. + * Checks if the main pipeline contains a {@link Head} node by walking the first-child chain. Only + * the main pipeline is checked — subqueries in joins or nested structures are not traversed. When + * the user's query already includes an explicit {@code head} command, we should not inject an + * additional Head for fetch_size so that the user's explicit limit takes precedence. */ private boolean containsHead(UnresolvedPlan plan) { - if (plan instanceof Head) { - return true; - } - for (var child : plan.getChild()) { - if (child instanceof UnresolvedPlan && containsHead((UnresolvedPlan) child)) { + UnresolvedPlan current = plan; + while (current != null) { + if (current instanceof Head) { return true; } + List children = current.getChild(); + if (children.isEmpty() || !(children.get(0) instanceof UnresolvedPlan)) { + break; + } + current = (UnresolvedPlan) children.get(0); } return false; } From 4cacb798b8d3cd16b9d7146d2f6a1c43314aea31 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 2 Mar 2026 12:14:19 -0800 Subject: [PATCH 07/10] Clarify fetch_size as default limit that yields to explicit head command Explain that since PPL's fetch_size has no pagination support, it acts as a default response size rather than an absolute cap. When the user specifies an explicit head command, fetch_size yields to avoid silently discarding rows with no way to retrieve them. Also document that head commands inside subqueries do not affect fetch_size for the outer query. Signed-off-by: ahkcs Signed-off-by: Kai Huang --- docs/user/interfaces/endpoint.rst | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/docs/user/interfaces/endpoint.rst b/docs/user/interfaces/endpoint.rst index 4aac3eaba5..b7f1058938 100644 --- a/docs/user/interfaces/endpoint.rst +++ b/docs/user/interfaces/endpoint.rst @@ -273,7 +273,7 @@ Fetch Size (PPL) [Experimental] Description ----------- -The ``fetch_size`` parameter limits the number of rows returned in a PPL query response. The value of ``fetch_size`` should be greater than ``0``. In absence of ``fetch_size`` or a value of ``0``, the result size is governed by the ``plugins.query.size_limit`` cluster setting. +The ``fetch_size`` parameter sets a **default** limit on the number of rows returned in a PPL query response. The value of ``fetch_size`` should be greater than ``0``. In absence of ``fetch_size`` or a value of ``0``, the result size is governed by the ``plugins.query.size_limit`` cluster setting. ``fetch_size`` can be specified either as a URL parameter or in the JSON request body. If both are provided, the JSON body value takes precedence. @@ -282,7 +282,7 @@ If ``fetch_size`` is larger than ``plugins.query.size_limit``, the result is cap Note ---- -Unlike SQL's ``fetch_size`` which enables cursor-based pagination, PPL's ``fetch_size`` does not return a cursor and does not support fetching additional pages. The response is always complete and final. +Unlike SQL's ``fetch_size`` which enables cursor-based pagination, PPL's ``fetch_size`` does not return a cursor and does not support fetching additional pages. The response is always complete and final. Because there is no pagination, ``fetch_size`` acts as a default response size for queries that do not specify their own row limit. If the query includes an explicit ``head`` command, the ``head`` limit takes precedence — otherwise ``fetch_size`` would silently discard rows with no way to retrieve them. +--------------------+-------------------------------------+------------------------------------+ | Aspect | SQL ``fetch_size`` | PPL ``fetch_size`` | @@ -297,9 +297,19 @@ Unlike SQL's ``fetch_size`` which enables cursor-based pagination, PPL's ``fetch Interaction with the ``head`` command -------------------------------------- -When a PPL query contains an explicit ``head`` command, the ``head`` command takes precedence over ``fetch_size``. Because PPL's ``fetch_size`` does not support pagination, capping the result below the user's explicit ``head`` limit would silently discard rows with no way to retrieve them. To avoid this, ``fetch_size`` is ignored when a ``head`` command is present, and the query returns the number of rows specified by ``head``. +When a PPL query contains an explicit ``head`` command in the main pipeline, the ``head`` command takes precedence over ``fetch_size``. Because PPL's ``fetch_size`` does not support pagination, capping the result below the user's explicit ``head`` limit would silently discard rows with no way to retrieve them. To avoid this, ``fetch_size`` is ignored when the main pipeline contains a ``head`` command, and the query returns the number of rows specified by ``head``. -If the query does **not** contain a ``head`` command, ``fetch_size`` limits the result as usual. +If the query does **not** contain a ``head`` command in the main pipeline, ``fetch_size`` limits the result as usual. Note that ``head`` commands inside subqueries (e.g., within a ``join``) do not affect ``fetch_size`` behavior for the outer query, since they operate at a different scope. + ++-----------------------------------------------+---------------------------------------------------+ +| Query | Behavior with ``fetch_size=5`` | ++===============================================+===================================================+ +| ``source=t \| fields age`` | ``fetch_size`` applies — returns 5 rows | ++-----------------------------------------------+---------------------------------------------------+ +| ``source=t \| head 100 \| fields age`` | ``head`` takes precedence — returns 100 rows | ++-----------------------------------------------+---------------------------------------------------+ +| ``source=t1 \| join (source=t2 \| head 100)`` | ``head`` is in subquery — ``fetch_size`` applies | ++-----------------------------------------------+---------------------------------------------------+ Examples:: From 3039353f04dec202ec7b34b309473e420fb6a071 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 2 Mar 2026 13:44:49 -0800 Subject: [PATCH 08/10] Add test case for head followed by fields with fetch_size Verifies that containsHead correctly walks past Project (from fields) to find the Head node underneath. Signed-off-by: ahkcs Signed-off-by: Kai Huang --- .../sql/ppl/parser/AstStatementBuilderTest.java | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java index e44066c973..719de2ec32 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java @@ -114,6 +114,20 @@ public void buildQueryStatementWithFetchSizeAndHeadWithOffset() { new Query(project(head(relation("t"), 3, 1), AllFields.of()), 0, PPL)); } + @Test + public void buildQueryStatementWithFetchSizeAndHeadFollowedByFields() { + // User query has head 100 | fields age, fetchSize=5 + // head is not the outermost node (Project from fields is), but containsHead walks the + // pipeline chain and finds it, so no Head(5) is injected + assertEqualWithFetchSize( + "source=t | head 100 | fields age", + 5, + new Query( + projectWithArg(head(relation("t"), 100, 0), defaultFieldsArgs(), field("age")), + 0, + PPL)); + } + @Test public void buildQueryStatementWithFetchSizeAndMultipleHeads() { // User query has head 3 | head 500, fetchSize=10 From 713b0958fcaa09079e294857875367917ec79c8c Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 2 Mar 2026 14:47:03 -0800 Subject: [PATCH 09/10] Remove unnecessary backslash escapes in RST table pipe characters Signed-off-by: Kai Huang --- docs/user/interfaces/endpoint.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/user/interfaces/endpoint.rst b/docs/user/interfaces/endpoint.rst index b7f1058938..89098fe418 100644 --- a/docs/user/interfaces/endpoint.rst +++ b/docs/user/interfaces/endpoint.rst @@ -304,11 +304,11 @@ If the query does **not** contain a ``head`` command in the main pipeline, ``fet +-----------------------------------------------+---------------------------------------------------+ | Query | Behavior with ``fetch_size=5`` | +===============================================+===================================================+ -| ``source=t \| fields age`` | ``fetch_size`` applies — returns 5 rows | +| ``source=t | fields age`` | ``fetch_size`` applies — returns 5 rows | +-----------------------------------------------+---------------------------------------------------+ -| ``source=t \| head 100 \| fields age`` | ``head`` takes precedence — returns 100 rows | +| ``source=t | head 100 | fields age`` | ``head`` takes precedence — returns 100 rows | +-----------------------------------------------+---------------------------------------------------+ -| ``source=t1 \| join (source=t2 \| head 100)`` | ``head`` is in subquery — ``fetch_size`` applies | +| ``source=t1 | join (source=t2 | head 100)`` | ``head`` is in subquery — ``fetch_size`` applies | +-----------------------------------------------+---------------------------------------------------+ Examples:: From 8c6d460fa92f3d3b95f78207ca0069efc299086f Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 3 Mar 2026 12:33:05 -0800 Subject: [PATCH 10/10] Add integration test for fetch_size with head in join subquery Validates that head commands inside join subqueries do not suppress fetch_size on the outer query, locking down the scoping guarantee. Signed-off-by: ahkcs Signed-off-by: Kai Huang --- .../sql/calcite/remote/CalcitePPLJoinIT.java | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJoinIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJoinIT.java index d6d1c72f99..84d7cce8d9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJoinIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJoinIT.java @@ -5,9 +5,11 @@ package org.opensearch.sql.calcite.remote; +import static org.opensearch.sql.legacy.TestUtils.getResponseBody; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_HOBBIES; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OCCUPATION; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STATE_COUNTRY; +import static org.opensearch.sql.plugin.rest.RestPPLQueryAction.QUERY_API_ENDPOINT; import static org.opensearch.sql.util.MatcherUtils.assertJsonEquals; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; @@ -17,9 +19,13 @@ import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; +import java.util.Locale; import org.json.JSONObject; +import org.junit.Assert; import org.junit.Test; import org.opensearch.client.Request; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.Response; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.legacy.TestsConstants; import org.opensearch.sql.ppl.PPLIntegTestCase; @@ -1075,4 +1081,32 @@ public void testComplexSortPushDownForSMJWithMaxOptionAndFieldList() throws IOEx rows("Jake", "USA", "California", 4, 2023, 70, "a"), rows("Hello", "USA", "New York", 4, 2023, 30, "e")); } + + @Test + public void testFetchSizeAppliesWhenHeadOnlyInJoinSubquery() throws IOException { + // head inside a join subquery should NOT suppress fetch_size on the outer query. + // The subquery's head operates at a different scope. + // Self-join STATE_COUNTRY on name, right side limited to head 3. + // fetch_size=2 should still cap the outer result to 2 rows. + String query = + String.format( + Locale.ROOT, + "source=%s | inner join left=a, right=b ON a.name = b.name" + + " [source=%s | sort name | head 3] | sort a.name | fields a.name, a.age", + TEST_INDEX_STATE_COUNTRY, + TEST_INDEX_STATE_COUNTRY); + Request request = new Request("POST", QUERY_API_ENDPOINT); + request.setJsonEntity( + String.format(Locale.ROOT, "{\"query\": \"%s\", \"fetch_size\": 2}", query)); + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader("Content-Type", "application/json"); + request.setOptions(restOptionsBuilder); + Response response = client().performRequest(request); + Assert.assertEquals(200, response.getStatusLine().getStatusCode()); + JSONObject result = new JSONObject(getResponseBody(response, true)); + verifySchema(result, schema("name", "string"), schema("age", "int")); + // Right side after sort+head 3: David, Hello, Jake. Self-join matches all 3. + // Outer sort by name + fetch_size=2 caps to first 2: David, Hello + verifyDataRows(result, rows("David", 40), rows("Hello", 30)); + } }