diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 10c5d2aa88..8095fc73f9 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -913,6 +913,9 @@ public RelNode visitTranspose( RelBuilder b = context.relBuilder; RexBuilder rx = context.rexBuilder; RelDataType varchar = rx.getTypeFactory().createSqlType(SqlTypeName.VARCHAR); + int axisLiteralLength = fieldNames.stream().mapToInt(String::length).max().orElse(0); + RelDataType axisLiteralType = + rx.getTypeFactory().createSqlType(SqlTypeName.CHAR, axisLiteralLength); // Step 1: ROW_NUMBER b.projectPlus( @@ -930,18 +933,22 @@ public RelNode visitTranspose( .map( f -> Map.entry( - ImmutableList.of(rx.makeLiteral(f)), + ImmutableList.of( + (RexLiteral) rx.makeLiteral(f, axisLiteralType, false, false)), ImmutableList.of((RexNode) rx.makeCast(varchar, b.field(f), true)))) .collect(Collectors.toList())); // Step 3: Trim spaces from columnName column before pivot RexNode trimmedColumnName = - context.rexBuilder.makeCall( - SqlStdOperatorTable.TRIM, - context.rexBuilder.makeFlag(SqlTrimFunction.Flag.BOTH), - context.rexBuilder.makeLiteral(" "), - b.field(columnName)); + context.rexBuilder.makeCast( + varchar, + context.rexBuilder.makeCall( + SqlStdOperatorTable.TRIM, + context.rexBuilder.makeFlag(SqlTrimFunction.Flag.BOTH), + context.rexBuilder.makeLiteral(" "), + b.field(columnName)), + true); // Step 4: PIVOT b.pivot( diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml index d0a2f80d86..903980bed9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml @@ -3,20 +3,21 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(column_names=[$0], row 1=[$1], row 2=[$2], row 3=[$3], row 4=[$4]) LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5]) - LogicalProject(_value_transpose_=[CAST($19):VARCHAR NOT NULL], $f20=[TRIM(FLAG(BOTH), ' ', $18)], $f21=[=($17, 1)], $f22=[=($17, 2)], $f23=[=($17, 3)], $f24=[=($17, 4)]) + LogicalProject(_value_transpose_=[CAST($19):VARCHAR NOT NULL], $f20=[CAST(TRIM(FLAG(BOTH), ' ', $18)):VARCHAR NOT NULL], $f21=[=($17, 1)], $f22=[=($17, 2)], $f23=[=($17, 3)], $f24=[=($17, 4)]) LogicalFilter(condition=[IS NOT NULL($19)]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], _value_transpose_=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, null:NULL)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], _value_transpose_=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname '), CAST($1):VARCHAR NOT NULL, =($18, 'address '), CAST($2):VARCHAR NOT NULL, =($18, 'balance '), CAST($3):VARCHAR NOT NULL, =($18, 'gender '), CAST($4):VARCHAR NOT NULL, =($18, 'city '), CAST($5):VARCHAR NOT NULL, =($18, 'employer '), CAST($6):VARCHAR NOT NULL, =($18, 'state '), CAST($7):VARCHAR NOT NULL, =($18, 'age '), CAST($8):VARCHAR NOT NULL, =($18, 'email '), CAST($9):VARCHAR NOT NULL, =($18, 'lastname '), CAST($10):VARCHAR NOT NULL, null:NULL)]) LogicalJoin(condition=[true], joinType=[inner]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[ROW_NUMBER() OVER ()]) LogicalSort(fetch=[5]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - LogicalValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }]]) + LogicalValues(tuples=[[{ 'account_number' }, { 'firstname ' }, { 'address ' }, { 'balance ' }, { 'gender ' }, { 'city ' }, { 'employer ' }, { 'state ' }, { 'age ' }, { 'email ' }, { 'lastname ' }]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5]) - EnumerableCalc(expr#0..12=[{inputs}], expr#13=['account_number'], expr#14=[=($t12, $t13)], expr#15=[CAST($t0):VARCHAR NOT NULL], expr#16=['firstname'], expr#17=[=($t12, $t16)], expr#18=[CAST($t1):VARCHAR NOT NULL], expr#19=['address'], expr#20=[=($t12, $t19)], expr#21=[CAST($t2):VARCHAR NOT NULL], expr#22=['balance'], expr#23=[=($t12, $t22)], expr#24=[CAST($t3):VARCHAR NOT NULL], expr#25=['gender'], expr#26=[=($t12, $t25)], expr#27=[CAST($t4):VARCHAR NOT NULL], expr#28=['city'], expr#29=[=($t12, $t28)], expr#30=[CAST($t5):VARCHAR NOT NULL], expr#31=['employer'], expr#32=[=($t12, $t31)], expr#33=[CAST($t6):VARCHAR NOT NULL], expr#34=['state'], expr#35=[=($t12, $t34)], expr#36=[CAST($t7):VARCHAR NOT NULL], expr#37=['age'], expr#38=[=($t12, $t37)], expr#39=[CAST($t8):VARCHAR NOT NULL], expr#40=['email'], expr#41=[=($t12, $t40)], expr#42=[CAST($t9):VARCHAR NOT NULL], expr#43=['lastname'], expr#44=[=($t12, $t43)], expr#45=[CAST($t10):VARCHAR NOT NULL], expr#46=[null:NULL], expr#47=[CASE($t14, $t15, $t17, $t18, $t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t46)], expr#48=[CAST($t47):VARCHAR NOT NULL], expr#49=[FLAG(BOTH)], expr#50=[' '], expr#51=[TRIM($t49, $t50, $t12)], expr#52=[1], expr#53=[=($t11, $t52)], expr#54=[2], expr#55=[=($t11, $t54)], expr#56=[3], expr#57=[=($t11, $t56)], expr#58=[4], expr#59=[=($t11, $t58)], _value_transpose_=[$t48], $f20=[$t51], $f21=[$t53], $f22=[$t55], $f23=[$t57], $f24=[$t59]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=['account_number'], expr#14=[=($t12, $t13)], expr#15=[CAST($t0):VARCHAR NOT NULL], expr#16=['firstname '], expr#17=[=($t12, $t16)], expr#18=[CAST($t1):VARCHAR NOT NULL], expr#19=['address '], expr#20=[=($t12, $t19)], expr#21=[CAST($t2):VARCHAR NOT NULL], expr#22=['balance '], expr#23=[=($t12, $t22)], expr#24=[CAST($t3):VARCHAR NOT NULL], expr#25=['gender '], expr#26=[=($t12, $t25)], expr#27=[CAST($t4):VARCHAR NOT NULL], expr#28=['city '], expr#29=[=($t12, $t28)], expr#30=[CAST($t5):VARCHAR NOT NULL], expr#31=['employer '], expr#32=[=($t12, $t31)], expr#33=[CAST($t6):VARCHAR NOT NULL], expr#34=['state '], expr#35=[=($t12, $t34)], expr#36=[CAST($t7):VARCHAR NOT NULL], expr#37=['age '], expr#38=[=($t12, $t37)], expr#39=[CAST($t8):VARCHAR NOT NULL], expr#40=['email '], expr#41=[=($t12, $t40)], expr#42=[CAST($t9):VARCHAR NOT NULL], expr#43=['lastname '], expr#44=[=($t12, $t43)], expr#45=[CAST($t10):VARCHAR NOT NULL], expr#46=[null:NULL], expr#47=[CASE($t14, $t15, $t17, $t18, $t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t46)], expr#48=[CAST($t47):VARCHAR NOT NULL], expr#49=[FLAG(BOTH)], expr#50=[' '], expr#51=[TRIM($t49, $t50, $t12)], expr#52=[CAST($t51):VARCHAR NOT NULL], expr#53=[1], expr#54=[=($t11, $t53)], expr#55=[2], expr#56=[=($t11, $t55)], expr#57=[3], expr#58=[=($t11, $t57)], expr#59=[4], expr#60=[=($t11, $t59)], _value_transpose_=[$t48], $f20=[$t52], $f21=[$t54], $f22=[$t56], $f23=[$t58], $f24=[$t60]) EnumerableNestedLoopJoin(condition=[true], joinType=[inner]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) - EnumerableCalc(expr#0=[{inputs}], expr#1=[Sarg['account_number', 'address':CHAR(14), 'age':CHAR(14), 'balance':CHAR(14), 'city':CHAR(14), 'email':CHAR(14), 'employer':CHAR(14), 'firstname':CHAR(14), 'gender':CHAR(14), 'lastname':CHAR(14), 'state':CHAR(14)]:CHAR(14)], expr#2=[SEARCH($t0, $t1)], column_names=[$t0], $condition=[$t2]) - EnumerableValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }]]) + EnumerableCalc(expr#0=[{inputs}], expr#1=[Sarg['account_number', 'address ', 'age ', 'balance ', 'city ', 'email ', 'employer ', 'firstname ', 'gender ', 'lastname ', 'state ']:CHAR(14)], expr#2=[SEARCH($t0, $t1)], column_names=[$t0], $condition=[$t2]) + EnumerableValues(tuples=[[{ 'account_number' }, { 'firstname ' }, { 'address ' }, { 'balance ' }, { 'gender ' }, { 'city ' }, { 'employer ' }, { 'state ' }, { 'age ' }, { 'email ' }, { 'lastname ' }]]) + diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java index 69bc1ae263..a9be30cb14 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java @@ -6,7 +6,10 @@ package org.opensearch.sql.ppl.calcite; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.test.CalciteAssert; +import org.junit.Assert; import org.junit.Test; public class CalcitePPLTransposeTest extends CalcitePPLAbstractTest { @@ -25,8 +28,8 @@ public void testSimpleCountWithTranspose() { + " FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5], row" + " 5_null=[MAX($0) FILTER $6])\n" + " LogicalProject(_value_transpose_=[CAST($3):VARCHAR NOT NULL]," - + " $f4=[TRIM(FLAG(BOTH), ' '," - + " $2)], $f5=[=($1, 1)], $f6=[=($1, 2)], $f7=[=($1, 3)], $f8=[=($1, 4)], $f9=[=($1," + + " $f4=[CAST(TRIM(FLAG(BOTH), ' ', $2)):VARCHAR NOT NULL], $f5=[=($1, 1)]," + + " $f6=[=($1, 2)], $f7=[=($1, 3)], $f8=[=($1, 4)], $f9=[=($1," + " 5)])\n" + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + " LogicalProject(c=[$0], _row_number_transpose_=[$1], column=[$2]," @@ -41,16 +44,13 @@ public void testSimpleCountWithTranspose() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TRIM(`column`) `column`, MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`_value_transpose_` AS STRING))" - + " FILTER (WHERE" - + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`_value_transpose_` AS STRING))" - + " FILTER (WHERE" - + " `_row_number_transpose_` = 3) `row 3`, MAX(CAST(`_value_transpose_` AS STRING))" - + " FILTER (WHERE" + "SELECT CAST(TRIM(`column`) AS STRING) `column`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`_value_transpose_`" + + " AS STRING)) FILTER (WHERE `_row_number_transpose_` = 2) `row 2`," + + " MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE `_row_number_transpose_` =" + + " 3) `row 3`, MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`_value_transpose_` AS STRING))" - + " FILTER (WHERE" - + " `_row_number_transpose_` = 5) `row 5`\n" + + " FILTER (WHERE `_row_number_transpose_` = 5) `row 5`\n" + "FROM (SELECT `t0`.`c`, `t0`.`_row_number_transpose_`, `t1`.`column`, CASE WHEN" + " `t1`.`column` = 'c' THEN CAST(`t0`.`c` AS STRING) ELSE NULL END" + " `_value_transpose_`\n" @@ -58,7 +58,7 @@ public void testSimpleCountWithTranspose() { + "FROM `scott`.`EMP`) `t0`\n" + "CROSS JOIN (VALUES ('c')) `t1` (`column`)) `t2`\n" + "WHERE `t2`.`_value_transpose_` IS NOT NULL\n" - + "GROUP BY TRIM(`column`)"; + + "GROUP BY CAST(TRIM(`column`) AS STRING)"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -75,14 +75,14 @@ public void testMultipleAggregatesWithAliasesTranspose() { + " FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5], row" + " 5_null=[MAX($0) FILTER $6])\n" + " LogicalProject(_value_transpose_=[CAST($6):VARCHAR NOT NULL]," - + " $f7=[TRIM(FLAG(BOTH), ' '," - + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)], $f11=[=($4, 4)]," + + " $f7=[CAST(TRIM(FLAG(BOTH), ' ', $5)):VARCHAR NOT NULL], $f8=[=($4, 1)]," + + " $f9=[=($4, 2)], $f10=[=($4, 3)], $f11=[=($4, 4)]," + " $f12=[=($4, 5)])\n" + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," + " _row_number_transpose_=[$4], column=[$5], _value_transpose_=[CASE(=($5, 'avg_sal')," + " NUMBER_TO_STRING($0), =($5, 'max_sal'), NUMBER_TO_STRING($1), =($5, 'min_sal')," - + " NUMBER_TO_STRING($2), =($5, 'cnt'), CAST($3):VARCHAR NOT NULL, null:NULL)])\n" + + " NUMBER_TO_STRING($2), =($5, 'cnt '), CAST($3):VARCHAR NOT NULL, null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" @@ -91,7 +91,7 @@ public void testMultipleAggregatesWithAliasesTranspose() { + " LogicalProject(SAL=[$5])\n" + " LogicalTableScan(table=[[scott, EMP]])\n" + " LogicalValues(tuples=[[{ 'avg_sal' }, { 'max_sal' }, { 'min_sal' }, {" - + " 'cnt' }]])\n"; + + " 'cnt ' }]])\n"; verifyLogical(root, expectedLogical); String expectedResult = "column=avg_sal; row 1=2073.214285; row 2=null; row 3=null; row 4=null; row 5=null\n" @@ -102,31 +102,28 @@ public void testMultipleAggregatesWithAliasesTranspose() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TRIM(`column`) `column`, MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`_value_transpose_` AS STRING))" - + " FILTER (WHERE" - + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`_value_transpose_` AS STRING))" - + " FILTER (WHERE" - + " `_row_number_transpose_` = 3) `row 3`, MAX(CAST(`_value_transpose_` AS STRING))" - + " FILTER (WHERE" + "SELECT CAST(TRIM(`column`) AS STRING) `column`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`_value_transpose_`" + + " AS STRING)) FILTER (WHERE `_row_number_transpose_` = 2) `row 2`," + + " MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE `_row_number_transpose_` =" + + " 3) `row 3`, MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`_value_transpose_` AS STRING))" - + " FILTER (WHERE" - + " `_row_number_transpose_` = 5) `row 5`\n" + + " FILTER (WHERE `_row_number_transpose_` = 5) `row 5`\n" + "FROM (SELECT `t1`.`avg_sal`, `t1`.`max_sal`, `t1`.`min_sal`, `t1`.`cnt`," + " `t1`.`_row_number_transpose_`, `t2`.`column`, CASE WHEN `t2`.`column` = 'avg_sal'" + " THEN NUMBER_TO_STRING(`t1`.`avg_sal`) WHEN `t2`.`column` = 'max_sal' THEN" + " NUMBER_TO_STRING(`t1`.`max_sal`) WHEN `t2`.`column` = 'min_sal' THEN" - + " NUMBER_TO_STRING(`t1`.`min_sal`) WHEN `t2`.`column` = 'cnt' THEN CAST(`t1`.`cnt` AS" - + " STRING) ELSE NULL END `_value_transpose_`\n" + + " NUMBER_TO_STRING(`t1`.`min_sal`) WHEN `t2`.`column` = 'cnt ' THEN" + + " CAST(`t1`.`cnt` AS STRING) ELSE NULL END `_value_transpose_`\n" + "FROM (SELECT AVG(`SAL`) `avg_sal`, MAX(`SAL`) `max_sal`, MIN(`SAL`) `min_sal`," + " COUNT(*) `cnt`, ROW_NUMBER() OVER () `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t1`\n" + "CROSS JOIN (VALUES ('avg_sal'),\n" + "('max_sal'),\n" + "('min_sal'),\n" - + "('cnt')) `t2` (`column`)) `t3`\n" + + "('cnt ')) `t2` (`column`)) `t3`\n" + "WHERE `t3`.`_value_transpose_` IS NOT NULL\n" - + "GROUP BY TRIM(`column`)"; + + "GROUP BY CAST(TRIM(`column`) AS STRING)"; /* "SELECT `column`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE NULL" @@ -164,18 +161,18 @@ public void testTransposeWithLimit() { + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + " FILTER $3], row 3_null=[MAX($0) FILTER $4])\n" + " LogicalProject(_value_transpose_=[CAST($6):VARCHAR NOT NULL]," - + " $f7=[TRIM(FLAG(BOTH), ' '," - + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)])\n" + + " $f7=[CAST(TRIM(FLAG(BOTH), ' ', $5)):VARCHAR NOT NULL], $f8=[=($4, 1)]," + + " $f9=[=($4, 2)], $f10=[=($4, 3)])\n" + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3]," + " _row_number_transpose_=[$4], column=[$5], _value_transpose_=[CASE(=($5, 'ENAME')," - + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," - + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM '), NUMBER_TO_STRING($1), =($5, 'JOB ')," + + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL '), NUMBER_TO_STRING($3), null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + " LogicalTableScan(table=[[scott, EMP]])\n" - + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL'" + + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM ' }, { 'JOB ' }, { 'SAL '" + " }]])\n"; verifyLogical(root, expectedLogical); @@ -188,31 +185,39 @@ public void testTransposeWithLimit() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TRIM(`column`) `column`, MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`_value_transpose_` AS STRING))" - + " FILTER (WHERE" - + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`_value_transpose_` AS STRING))" - + " FILTER (WHERE" - + " `_row_number_transpose_` = 3) `row 3`\n" + "SELECT CAST(TRIM(`column`) AS STRING) `column`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`_value_transpose_`" + + " AS STRING)) FILTER (WHERE `_row_number_transpose_` = 2) `row 2`," + + " MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE `_row_number_transpose_` =" + + " 3) `row 3`\n" + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`," + " `t`.`_row_number_transpose_`, `t0`.`column`, CASE WHEN `t0`.`column` = 'ENAME' THEN" - + " CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column` = 'COMM' THEN" - + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column` = 'JOB' THEN CAST(`t`.`JOB` AS" - + " STRING) WHEN `t0`.`column` = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END" + + " CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column` = 'COMM ' THEN" + + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column` = 'JOB ' THEN CAST(`t`.`JOB` AS" + + " STRING) WHEN `t0`.`column` = 'SAL ' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END" + " `_value_transpose_`\n" + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER ()" + " `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t`\n" + "CROSS JOIN (VALUES ('ENAME'),\n" - + "('COMM'),\n" - + "('JOB'),\n" - + "('SAL')) `t0` (`column`)) `t1`\n" + + "('COMM '),\n" + + "('JOB '),\n" + + "('SAL ')) `t0` (`column`)) `t1`\n" + "WHERE `t1`.`_value_transpose_` IS NOT NULL\n" - + "GROUP BY TRIM(`column`)"; + + "GROUP BY CAST(TRIM(`column`) AS STRING)"; verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testTransposeColumnAxisUsesUnboundedVarchar() { + RelNode root = getRelNode("source=EMP | fields ENAME, COMM, JOB, SAL | transpose 3"); + RelDataType columnType = root.getRowType().getFieldList().get(0).getType(); + + Assert.assertEquals(SqlTypeName.VARCHAR, columnType.getSqlTypeName()); + Assert.assertEquals(RelDataType.PRECISION_NOT_SPECIFIED, columnType.getPrecision()); + } + @Test public void testTransposeWithValueFieldNameCollision() { // Reproduce issue #5172: hardcoded 'value' unpivot column collides with @@ -237,19 +242,19 @@ public void testTransposeWithLimitColumnName() { + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + " FILTER $3], row 3_null=[MAX($0) FILTER $4])\n" + " LogicalProject(_value_transpose_=[CAST($6):VARCHAR NOT NULL]," - + " $f7=[TRIM(FLAG(BOTH), ' '," - + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)])\n" + + " $f7=[CAST(TRIM(FLAG(BOTH), ' ', $5)):VARCHAR NOT NULL], $f8=[=($4, 1)]," + + " $f9=[=($4, 2)], $f10=[=($4, 3)])\n" + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3]," + " _row_number_transpose_=[$4], column_names=[$5]," + " _value_transpose_=[CASE(=($5, 'ENAME')," - + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," - + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM '), NUMBER_TO_STRING($1), =($5, 'JOB ')," + + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL '), NUMBER_TO_STRING($3), null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + " LogicalTableScan(table=[[scott, EMP]])\n" - + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL'" + + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM ' }, { 'JOB ' }, { 'SAL '" + " }]])\n"; verifyLogical(root, expectedLogical); @@ -261,7 +266,7 @@ public void testTransposeWithLimitColumnName() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TRIM(`column_names`) `column_names`," + "SELECT CAST(TRIM(`column_names`) AS STRING) `column_names`," + " MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + " `_row_number_transpose_` = 1) `row 1`," + " MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" @@ -270,19 +275,19 @@ public void testTransposeWithLimitColumnName() { + " `_row_number_transpose_` = 3) `row 3`\n" + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`," + " `t`.`_row_number_transpose_`, `t0`.`column_names`, CASE WHEN `t0`.`column_names` =" - + " 'ENAME' THEN CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column_names` = 'COMM' THEN" - + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column_names` = 'JOB' THEN CAST(`t`.`JOB`" - + " AS STRING) WHEN `t0`.`column_names` = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE" + + " 'ENAME' THEN CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column_names` = 'COMM ' THEN" + + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column_names` = 'JOB ' THEN CAST(`t`.`JOB`" + + " AS STRING) WHEN `t0`.`column_names` = 'SAL ' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE" + " NULL END `_value_transpose_`\n" + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER ()" + " `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t`\n" + "CROSS JOIN (VALUES ('ENAME'),\n" - + "('COMM'),\n" - + "('JOB'),\n" - + "('SAL')) `t0` (`column_names`)) `t1`\n" + + "('COMM '),\n" + + "('JOB '),\n" + + "('SAL ')) `t0` (`column_names`)) `t1`\n" + "WHERE `t1`.`_value_transpose_` IS NOT NULL\n" - + "GROUP BY TRIM(`column_names`)"; + + "GROUP BY CAST(TRIM(`column_names`) AS STRING)"; verifyPPLToSparkSQL(root, expectedSparkSql); }