From 004913bb7fbf6bed919a174908c28402713ff821 Mon Sep 17 00:00:00 2001 From: kokila-19 Date: Fri, 17 Apr 2026 14:30:50 +0530 Subject: [PATCH] HIVE-29570: Fix MERGE rewrite parse failure by correctly quoting qualified column names when they are function names(like date) --- .../hadoop/hive/ql/metadata/HiveUtils.java | 26 +- ql/src/test/queries/clientpositive/sqlmerge.q | 12 + .../clientpositive/llap/sqlmerge.q.out | 279 ++++++++++++++++++ .../clientpositive/tez/explainanalyze_3.q.out | 4 +- 4 files changed, 316 insertions(+), 5 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java index a75349bf5444..d9fcc597a858 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java @@ -31,7 +31,6 @@ import com.google.common.collect.SetMultimap; import org.antlr.runtime.TokenRewriteStream; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.lib.CostLessRuleDispatcher; import org.apache.hadoop.hive.ql.lib.ExpressionWalker; import org.apache.hadoop.hive.ql.lib.Node; @@ -354,14 +353,35 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje throws SemanticException { UnparseTranslator unparseTranslator = ((QuotedIdExpressionContext)procCtx).getUnparseTranslator(); ASTNode identifier = (ASTNode) nd; - String id = identifier.getText(); - if (FunctionRegistry.getFunctionInfo(id) != null){ + if (isFunctionNameToken(identifier)) { return null; } unparseTranslator.addIdentifierTranslation(identifier); return null; } + + /* + * Quote identifiers during unparse. + * + * Only skip quoting for function names. + * Always quote column names, even if they match function names. + * For example, use `alias`.`date` instead of `alias`.date. + */ + private static boolean isFunctionNameToken(ASTNode identifier) { + ASTNode parent = (ASTNode) identifier.getParent(); + if (parent == null || parent.getChildCount() == 0) { + return false; + } + + int parentType = parent.getType(); + boolean isFunctionNode = + parentType == HiveParser.TOK_FUNCTION + || parentType == HiveParser.TOK_FUNCTIONDI + || parentType == HiveParser.TOK_FUNCTIONSTAR; + + return isFunctionNode && parent.getChild(0) == identifier; + } } static class QuotedIdExpressionContext implements NodeProcessorCtx { diff --git a/ql/src/test/queries/clientpositive/sqlmerge.q b/ql/src/test/queries/clientpositive/sqlmerge.q index 412099d820a2..baf35cb4d232 100644 --- a/ql/src/test/queries/clientpositive/sqlmerge.q +++ b/ql/src/test/queries/clientpositive/sqlmerge.q @@ -22,3 +22,15 @@ explain merge into acidTbl_n0 as t using nonAcidOrcTbl_n0 s ON t.a = s.a WHEN MATCHED AND s.a > 8 THEN DELETE WHEN MATCHED THEN UPDATE SET b = 7 WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b); + +-- MERGE rewrite must preserve quoting for qualified identifiers like s.`date` when column name is function keyword +drop table if exists `count`; +drop table if exists tgt_table; +create table `count`(a int, `date` int) clustered by (a) into 2 buckets stored as orc + TBLPROPERTIES ('transactional'='true'); +create table tgt_table(a int, `date` int) clustered by (a) into 2 buckets stored as orc + TBLPROPERTIES ('transactional'='true'); + +explain merge into tgt_table using `count` ON tgt_table.a = `count`.a +WHEN MATCHED THEN UPDATE SET `date` = `count`.`date` +WHEN NOT MATCHED THEN INSERT VALUES(`count`.a, `count`.`date`); diff --git a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out index 807948e470f4..f2727b937a53 100644 --- a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out +++ b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out @@ -819,3 +819,282 @@ STAGE PLANS: Stats Work Basic Stats Work: +PREHOOK: query: drop table if exists `count` +PREHOOK: type: DROPTABLE +PREHOOK: Output: database:default +POSTHOOK: query: drop table if exists `count` +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: database:default +PREHOOK: query: drop table if exists tgt_table +PREHOOK: type: DROPTABLE +PREHOOK: Output: database:default +POSTHOOK: query: drop table if exists tgt_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: database:default +PREHOOK: query: create table `count`(a int, `date` int) clustered by (a) into 2 buckets stored as orc + TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@count +POSTHOOK: query: create table `count`(a int, `date` int) clustered by (a) into 2 buckets stored as orc + TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@count +PREHOOK: query: create table tgt_table(a int, `date` int) clustered by (a) into 2 buckets stored as orc + TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tgt_table +POSTHOOK: query: create table tgt_table(a int, `date` int) clustered by (a) into 2 buckets stored as orc + TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tgt_table +PREHOOK: query: explain merge into tgt_table using `count` ON tgt_table.a = `count`.a +WHEN MATCHED THEN UPDATE SET `date` = `count`.`date` +WHEN NOT MATCHED THEN INSERT VALUES(`count`.a, `count`.`date`) +PREHOOK: type: QUERY +PREHOOK: Input: default@count +PREHOOK: Input: default@tgt_table +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@tgt_table +PREHOOK: Output: default@tgt_table +POSTHOOK: query: explain merge into tgt_table using `count` ON tgt_table.a = `count`.a +WHEN MATCHED THEN UPDATE SET `date` = `count`.`date` +WHEN NOT MATCHED THEN INSERT VALUES(`count`.a, `count`.`date`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@count +POSTHOOK: Input: default@tgt_table +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@tgt_table +POSTHOOK: Output: default@tgt_table +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: count + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), date (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 6 + Map Operator Tree: + TableScan + alias: tgt_table + filterExpr: a is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID (type: struct), a (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int), _col1 (type: int), _col2 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Filter Operator + predicate: (_col1 = _col0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col1 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Filter Operator + predicate: (_col1 = _col0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct) + outputColumnNames: _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col3 (type: struct) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.tgt_table + Write Type: INSERT + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.tgt_table + Write Type: UPDATE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.tgt_table + Write Type: INSERT + + Stage: Stage-5 + Stats Work + Basic Stats Work: + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.tgt_table + Write Type: UPDATE + + Stage: Stage-6 + Stats Work + Basic Stats Work: + + Stage: Stage-2 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + + Stage: Stage-7 + Stats Work + Basic Stats Work: + diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index 980d6cc6c78a..1f065c1ba74a 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -375,10 +375,10 @@ Stage-0 Number of rows:1 TableScan [TS_0] -PREHOOK: query: DROP TEMPORARY MACRO SIGMOID +PREHOOK: query: DROP TEMPORARY MACRO `SIGMOID` PREHOOK: type: DROPMACRO PREHOOK: Output: database:default -POSTHOOK: query: DROP TEMPORARY MACRO SIGMOID +POSTHOOK: query: DROP TEMPORARY MACRO `SIGMOID` POSTHOOK: type: DROPMACRO POSTHOOK: Output: database:default PREHOOK: query: explain analyze DROP TEMPORARY MACRO SIGMOID