From 29fa3a0ef31b36855303bb0270c6161edee0075b Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 17 Feb 2026 13:48:16 -0800 Subject: [PATCH 1/3] multisearch fix Signed-off-by: Kai Huang --- .../calcite/utils/OpenSearchTypeFactory.java | 31 +++ .../remote/CalciteMultisearchCommandIT.java | 201 +++++++++++++++++- 2 files changed, 230 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index 17d99fb4fbb..f95215592e5 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -45,6 +45,7 @@ import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; +import org.checkerframework.checker.nullness.qual.Nullable; import org.opensearch.sql.calcite.type.AbstractExprRelDataType; import org.opensearch.sql.calcite.type.ExprBinaryType; import org.opensearch.sql.calcite.type.ExprDateType; @@ -377,6 +378,36 @@ public static boolean isNumericType(RelDataType fieldType) { return false; } + /** + * Preserves OpenSearch UDT types through set operations (UNION, INTERSECT, EXCEPT). When all + * input types share the same UDT (e.g., all are EXPR_TIMESTAMP), the result retains the UDT + * wrapper instead of being downgraded to the underlying SQL type (e.g., VARCHAR). This is + * critical for operations like multisearch that use UNION ALL, where downstream operators (bin, + * span) rely on the UDT type to determine how to process the field. + */ + @Override + public @Nullable RelDataType leastRestrictive(List types) { + if (types.size() > 1) { + RelDataType first = types.get(0); + if (first instanceof AbstractExprRelDataType firstUdt) { + boolean allSameUdt = + types.stream() + .allMatch( + t -> + t instanceof AbstractExprRelDataType udt + && udt.getUdt() == firstUdt.getUdt()); + if (allSameUdt) { + boolean anyNullable = types.stream().anyMatch(RelDataType::isNullable); + if (anyNullable && !first.isNullable()) { + return firstUdt.createWithNullability(this, true); + } + return first; + } + } + } + return super.leastRestrictive(types); + } + /** * Checks if the RelDataType represents a time-based field (timestamp, date, or time). Supports * both standard SQL time types (including TIMESTAMP, TIMESTAMP_WITH_LOCAL_TIME_ZONE, DATE, TIME, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java index 393b0a4a501..526c38b0325 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java @@ -152,10 +152,10 @@ public void testMultisearchWithTimestampInterleaving() throws IOException { verifySchema( result, - schema("@timestamp", null, "string"), + schema("@timestamp", null, "timestamp"), schema("category", null, "string"), schema("value", null, "int"), - schema("timestamp", null, "string")); + schema("timestamp", null, "timestamp")); verifyDataRows( result, @@ -344,6 +344,203 @@ public void testMultisearchCrossIndexFieldSelection() throws IOException { rows(null, null, "Times Square", 1002)); } + // ======================================================================== + // Reproduction tests for GitHub issues #5145, #5146, #5147 + // ======================================================================== + + /** Reproduce #5145: multisearch without further processing should return all rows. */ + @Test + public void testMultisearchWithoutFurtherProcessing() throws IOException { + JSONObject result = + executeQuery( + "| multisearch [search source=opensearch-sql_test_index_time_data | where category =" + + " \\\"A\\\"] [search source=opensearch-sql_test_index_time_data | where category" + + " = \\\"B\\\"]"); + + verifySchema( + result, + schema("@timestamp", null, "timestamp"), + schema("category", null, "string"), + schema("value", null, "int"), + schema("timestamp", null, "timestamp")); + + // category A has 26 rows, category B has 25 rows = 51 total + assertEquals(51, result.getInt("total")); + } + + /** Reproduce #5146: span expression used after multisearch should work. */ + @Test + public void testMultisearchWithSpanExpression() throws IOException { + JSONObject result = + executeQuery( + "| multisearch [search source=opensearch-sql_test_index_time_data | where category =" + + " \\\"A\\\"] [search source=opensearch-sql_test_index_time_data2 | where category" + + " = \\\"E\\\"] | stats avg(value) by span(@timestamp, 5m)"); + + verifySchema( + result, + schema("avg(value)", null, "double"), + schema("span(@timestamp,5m)", null, "timestamp")); + + // Each data point falls in its own 5-min bucket (all >5min apart), so avg = single value + // Category A: 26 rows from time_test_data, Category E: 10 rows from time_test_data2 + verifyDataRows( + result, + // Category A (26 rows) + rows(8945.0, "2025-07-28 00:15:00"), + rows(6834.0, "2025-07-28 03:55:00"), + rows(6589.0, "2025-07-28 07:50:00"), + rows(9367.0, "2025-07-28 11:05:00"), + rows(9245.0, "2025-07-28 15:15:00"), + rows(8917.0, "2025-07-28 19:20:00"), + rows(8384.0, "2025-07-28 23:30:00"), + rows(8798.0, "2025-07-29 03:35:00"), + rows(9306.0, "2025-07-29 07:45:00"), + rows(8873.0, "2025-07-29 11:50:00"), + rows(8542.0, "2025-07-29 15:00:00"), + rows(9321.0, "2025-07-29 19:05:00"), + rows(8917.0, "2025-07-29 23:10:00"), + rows(8756.0, "2025-07-30 03:20:00"), + rows(9234.0, "2025-07-30 07:25:00"), + rows(8679.0, "2025-07-30 11:35:00"), + rows(8765.0, "2025-07-30 15:40:00"), + rows(9187.0, "2025-07-30 19:50:00"), + rows(8862.0, "2025-07-30 23:55:00"), + rows(8537.0, "2025-07-31 03:00:00"), + rows(9318.0, "2025-07-31 07:10:00"), + rows(8914.0, "2025-07-31 11:15:00"), + rows(8753.0, "2025-07-31 15:25:00"), + rows(9231.0, "2025-07-31 19:30:00"), + rows(8676.0, "2025-07-31 23:40:00"), + rows(8762.0, "2025-08-01 03:45:00"), + // Category E (10 rows) + rows(2001.0, "2025-08-01 04:00:00"), + rows(2003.0, "2025-08-01 01:00:00"), + rows(2005.0, "2025-07-31 20:45:00"), + rows(2007.0, "2025-07-31 16:00:00"), + rows(2009.0, "2025-07-31 12:30:00"), + rows(2011.0, "2025-07-31 08:00:00"), + rows(2013.0, "2025-07-31 04:30:00"), + rows(2015.0, "2025-07-31 01:00:00"), + rows(2017.0, "2025-07-30 21:30:00"), + rows(2019.0, "2025-07-30 18:00:00")); + } + + /** Reproduce #5147: bin command after multisearch should produce non-null @timestamp. */ + @Test + public void testMultisearchBinTimestamp() throws IOException { + JSONObject result = + executeQuery( + "| multisearch [search source=opensearch-sql_test_index_time_data | where category =" + + " \\\"A\\\"] [search source=opensearch-sql_test_index_time_data2 | where category" + + " = \\\"E\\\"] | fields @timestamp, category, value | bin @timestamp span=5m"); + + verifySchema( + result, + schema("category", null, "string"), + schema("value", null, "int"), + schema("@timestamp", null, "timestamp")); + + // bin floors @timestamp to 5-min boundaries; projectPlusOverriding moves @timestamp to end + // Category A: 26 rows from time_test_data, Category E: 10 rows from time_test_data2 + verifyDataRows( + result, + // Category A (26 rows) + rows("A", 8945, "2025-07-28 00:15:00"), + rows("A", 6834, "2025-07-28 03:55:00"), + rows("A", 6589, "2025-07-28 07:50:00"), + rows("A", 9367, "2025-07-28 11:05:00"), + rows("A", 9245, "2025-07-28 15:15:00"), + rows("A", 8917, "2025-07-28 19:20:00"), + rows("A", 8384, "2025-07-28 23:30:00"), + rows("A", 8798, "2025-07-29 03:35:00"), + rows("A", 9306, "2025-07-29 07:45:00"), + rows("A", 8873, "2025-07-29 11:50:00"), + rows("A", 8542, "2025-07-29 15:00:00"), + rows("A", 9321, "2025-07-29 19:05:00"), + rows("A", 8917, "2025-07-29 23:10:00"), + rows("A", 8756, "2025-07-30 03:20:00"), + rows("A", 9234, "2025-07-30 07:25:00"), + rows("A", 8679, "2025-07-30 11:35:00"), + rows("A", 8765, "2025-07-30 15:40:00"), + rows("A", 9187, "2025-07-30 19:50:00"), + rows("A", 8862, "2025-07-30 23:55:00"), + rows("A", 8537, "2025-07-31 03:00:00"), + rows("A", 9318, "2025-07-31 07:10:00"), + rows("A", 8914, "2025-07-31 11:15:00"), + rows("A", 8753, "2025-07-31 15:25:00"), + rows("A", 9231, "2025-07-31 19:30:00"), + rows("A", 8676, "2025-07-31 23:40:00"), + rows("A", 8762, "2025-08-01 03:45:00"), + // Category E (10 rows) + rows("E", 2001, "2025-08-01 04:00:00"), + rows("E", 2003, "2025-08-01 01:00:00"), + rows("E", 2005, "2025-07-31 20:45:00"), + rows("E", 2007, "2025-07-31 16:00:00"), + rows("E", 2009, "2025-07-31 12:30:00"), + rows("E", 2011, "2025-07-31 08:00:00"), + rows("E", 2013, "2025-07-31 04:30:00"), + rows("E", 2015, "2025-07-31 01:00:00"), + rows("E", 2017, "2025-07-30 21:30:00"), + rows("E", 2019, "2025-07-30 18:00:00")); + } + + /** Reproduce #5147 full pattern: bin + stats after multisearch. */ + @Test + public void testMultisearchBinAndStats() throws IOException { + JSONObject result = + executeQuery( + "| multisearch [search source=opensearch-sql_test_index_time_data | where category =" + + " \\\"A\\\"] [search source=opensearch-sql_test_index_time_data2 | where category" + + " = \\\"E\\\"] | bin @timestamp span=5m | stats avg(value) by @timestamp"); + + verifySchema( + result, schema("avg(value)", null, "double"), schema("@timestamp", null, "timestamp")); + + // Each data point falls in its own 5-min bucket (all >5min apart), so avg = single value + // Category A: 26 rows from time_test_data, Category E: 10 rows from time_test_data2 + verifyDataRows( + result, + // Category A (26 rows) + rows(8945.0, "2025-07-28 00:15:00"), + rows(6834.0, "2025-07-28 03:55:00"), + rows(6589.0, "2025-07-28 07:50:00"), + rows(9367.0, "2025-07-28 11:05:00"), + rows(9245.0, "2025-07-28 15:15:00"), + rows(8917.0, "2025-07-28 19:20:00"), + rows(8384.0, "2025-07-28 23:30:00"), + rows(8798.0, "2025-07-29 03:35:00"), + rows(9306.0, "2025-07-29 07:45:00"), + rows(8873.0, "2025-07-29 11:50:00"), + rows(8542.0, "2025-07-29 15:00:00"), + rows(9321.0, "2025-07-29 19:05:00"), + rows(8917.0, "2025-07-29 23:10:00"), + rows(8756.0, "2025-07-30 03:20:00"), + rows(9234.0, "2025-07-30 07:25:00"), + rows(8679.0, "2025-07-30 11:35:00"), + rows(8765.0, "2025-07-30 15:40:00"), + rows(9187.0, "2025-07-30 19:50:00"), + rows(8862.0, "2025-07-30 23:55:00"), + rows(8537.0, "2025-07-31 03:00:00"), + rows(9318.0, "2025-07-31 07:10:00"), + rows(8914.0, "2025-07-31 11:15:00"), + rows(8753.0, "2025-07-31 15:25:00"), + rows(9231.0, "2025-07-31 19:30:00"), + rows(8676.0, "2025-07-31 23:40:00"), + rows(8762.0, "2025-08-01 03:45:00"), + // Category E (10 rows) + rows(2001.0, "2025-08-01 04:00:00"), + rows(2003.0, "2025-08-01 01:00:00"), + rows(2005.0, "2025-07-31 20:45:00"), + rows(2007.0, "2025-07-31 16:00:00"), + rows(2009.0, "2025-07-31 12:30:00"), + rows(2011.0, "2025-07-31 08:00:00"), + rows(2013.0, "2025-07-31 04:30:00"), + rows(2015.0, "2025-07-31 01:00:00"), + rows(2017.0, "2025-07-30 21:30:00"), + rows(2019.0, "2025-07-30 18:00:00")); + } + @Test public void testMultisearchTypeConflictWithStats() { Exception exception = From 81c0d3feec043f39fc7a8d524e5a461ebc4de055 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 17 Feb 2026 14:36:46 -0800 Subject: [PATCH 2/3] fix IT Signed-off-by: Kai Huang --- .../sql/calcite/remote/CalcitePPLAppendCommandIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendCommandIT.java index d01ddfb2a44..6372b818b2b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendCommandIT.java @@ -253,7 +253,7 @@ public void testAppendSchemaMergeWithTimestampUDT() throws IOException { schema("account_number", "bigint"), schema("firstname", "string"), schema("age", "int"), - schema("birthdate", "string")); + schema("birthdate", "timestamp")); verifyDataRows(actual, rows(32, null, 34, "2018-08-11 00:00:00")); } From bb026a5573e6032419cf3d7e8004ecb9c21dd87f Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 17 Feb 2026 15:45:07 -0800 Subject: [PATCH 3/3] add UT Signed-off-by: Kai Huang --- .../calcite/utils/OpenSearchTypeFactory.java | 16 ++- .../utils/OpenSearchTypeFactoryTest.java | 131 ++++++++++++++++++ 2 files changed, 143 insertions(+), 4 deletions(-) create mode 100644 core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index f95215592e5..1edf6807b0a 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -380,10 +380,18 @@ public static boolean isNumericType(RelDataType fieldType) { /** * Preserves OpenSearch UDT types through set operations (UNION, INTERSECT, EXCEPT). When all - * input types share the same UDT (e.g., all are EXPR_TIMESTAMP), the result retains the UDT - * wrapper instead of being downgraded to the underlying SQL type (e.g., VARCHAR). This is - * critical for operations like multisearch that use UNION ALL, where downstream operators (bin, - * span) rely on the UDT type to determine how to process the field. + * input types share the same {@link AbstractExprRelDataType} with the same {@link + * AbstractExprRelDataType#getUdt() UDT}, the result retains the UDT wrapper instead of being + * downgraded to the underlying SQL type (e.g., VARCHAR). This is critical for operations like + * multisearch that use UNION ALL, where downstream operators (bin, span) rely on the UDT type to + * determine how to process the field. When inputs include non-UDT types or different UDTs, this + * method falls back to {@link super#leastRestrictive}. + * + * @param types the list of input {@link RelDataType} instances to find the least restrictive + * common type for + * @return the least restrictive {@link RelDataType} preserving the UDT wrapper when all inputs + * share the same UDT, or {@code null} if no common type exists (as determined by {@link + * super#leastRestrictive}) */ @Override public @Nullable RelDataType leastRestrictive(List types) { diff --git a/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java b/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java new file mode 100644 index 00000000000..99d232a076a --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java @@ -0,0 +1,131 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; + +import java.util.List; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.type.SqlTypeName; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.calcite.type.AbstractExprRelDataType; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; + +public class OpenSearchTypeFactoryTest { + + @Test + public void testLeastRestrictivePreservesUdtWhenAllInputsSameUdt() { + RelDataType ts1 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + RelDataType ts2 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(ts1, ts2)); + + assertNotNull(result); + assertInstanceOf(AbstractExprRelDataType.class, result); + assertEquals(ExprUDT.EXPR_TIMESTAMP, ((AbstractExprRelDataType) result).getUdt()); + } + + @Test + public void testLeastRestrictivePreservesUdtForDateType() { + RelDataType d1 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RelDataType d2 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(d1, d2)); + + assertNotNull(result); + assertInstanceOf(AbstractExprRelDataType.class, result); + assertEquals(ExprUDT.EXPR_DATE, ((AbstractExprRelDataType) result).getUdt()); + } + + @Test + public void testLeastRestrictivePreservesUdtForThreeInputs() { + RelDataType ts1 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + RelDataType ts2 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + RelDataType ts3 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(ts1, ts2, ts3)); + + assertNotNull(result); + assertInstanceOf(AbstractExprRelDataType.class, result); + assertEquals(ExprUDT.EXPR_TIMESTAMP, ((AbstractExprRelDataType) result).getUdt()); + } + + @Test + public void testLeastRestrictiveReturnsNullableWhenAnyInputIsNullable() { + RelDataType nonNullable = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP, false); + RelDataType nullable = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP, true); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(nonNullable, nullable)); + + assertNotNull(result); + assertInstanceOf(AbstractExprRelDataType.class, result); + assertEquals(ExprUDT.EXPR_TIMESTAMP, ((AbstractExprRelDataType) result).getUdt()); + assertTrue(result.isNullable()); + } + + @Test + public void testLeastRestrictiveReturnsNullableWhenFirstNullableSecondNot() { + RelDataType nullable = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP, true); + RelDataType nonNullable = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP, false); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(nullable, nonNullable)); + + assertNotNull(result); + assertInstanceOf(AbstractExprRelDataType.class, result); + assertTrue(result.isNullable()); + } + + @Test + public void testLeastRestrictiveFallsBackForMixedUdtAndNonUdt() { + RelDataType udt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + RelDataType plain = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(udt, plain)); + + // Falls back to super.leastRestrictive which may return a plain type or null + if (result != null) { + assertEquals(SqlTypeName.VARCHAR, result.getSqlTypeName()); + } + } + + @Test + public void testLeastRestrictiveFallsBackForDifferentUdts() { + RelDataType timestamp = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + RelDataType date = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(timestamp, date)); + + // Different UDTs — falls back to super.leastRestrictive + if (result != null) { + assertEquals(SqlTypeName.VARCHAR, result.getSqlTypeName()); + } + } + + @Test + public void testLeastRestrictiveDelegatesToSuperForSingleType() { + RelDataType single = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(single)); + + assertNotNull(result); + assertEquals(SqlTypeName.INTEGER, result.getSqlTypeName()); + } + + @Test + public void testLeastRestrictiveDelegatesToSuperForPlainTypes() { + RelDataType int1 = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType int2 = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(int1, int2)); + + assertNotNull(result); + assertEquals(SqlTypeName.INTEGER, result.getSqlTypeName()); + } +}