From 004913bb7fbf6bed919a174908c28402713ff821 Mon Sep 17 00:00:00 2001
From: kokila-19 <kokilanarayanan95@gmail.com>
Date: Fri, 17 Apr 2026 14:30:50 +0530
Subject: [PATCH] HIVE-29570: Fix MERGE rewrite parse failure by correctly
 quoting qualified column names when they are function names(like date)

---
 .../hadoop/hive/ql/metadata/HiveUtils.java    |  26 +-
 ql/src/test/queries/clientpositive/sqlmerge.q |  12 +
 .../clientpositive/llap/sqlmerge.q.out        | 279 ++++++++++++++++++
 .../clientpositive/tez/explainanalyze_3.q.out |   4 +-
 4 files changed, 316 insertions(+), 5 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java
index a75349bf5444..d9fcc597a858 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java
@@ -31,7 +31,6 @@
 import com.google.common.collect.SetMultimap;
 import org.antlr.runtime.TokenRewriteStream;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.lib.CostLessRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.ExpressionWalker;
 import org.apache.hadoop.hive.ql.lib.Node;
@@ -354,14 +353,35 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Obje
             throws SemanticException {
       UnparseTranslator unparseTranslator = ((QuotedIdExpressionContext)procCtx).getUnparseTranslator();
       ASTNode identifier = (ASTNode) nd;
-      String id = identifier.getText();
-      if (FunctionRegistry.getFunctionInfo(id) != null){
+      if (isFunctionNameToken(identifier)) {
         return null;
       }
 
       unparseTranslator.addIdentifierTranslation(identifier);
       return null;
     }
+
+    /*
+     * Quote identifiers during unparse.
+     *
+     * Only skip quoting for function names.
+     * Always quote column names, even if they match function names.
+     * For example, use `alias`.`date` instead of `alias`.date.
+     */
+    private static boolean isFunctionNameToken(ASTNode identifier) {
+      ASTNode parent = (ASTNode) identifier.getParent();
+      if (parent == null || parent.getChildCount() == 0) {
+        return false;
+      }
+
+      int parentType = parent.getType();
+      boolean isFunctionNode =
+          parentType == HiveParser.TOK_FUNCTION
+              || parentType == HiveParser.TOK_FUNCTIONDI
+              || parentType == HiveParser.TOK_FUNCTIONSTAR;
+
+      return isFunctionNode && parent.getChild(0) == identifier;
+    }
   }
 
   static class QuotedIdExpressionContext implements NodeProcessorCtx {
diff --git a/ql/src/test/queries/clientpositive/sqlmerge.q b/ql/src/test/queries/clientpositive/sqlmerge.q
index 412099d820a2..baf35cb4d232 100644
--- a/ql/src/test/queries/clientpositive/sqlmerge.q
+++ b/ql/src/test/queries/clientpositive/sqlmerge.q
@@ -22,3 +22,15 @@ explain merge into acidTbl_n0 as t using nonAcidOrcTbl_n0 s ON t.a = s.a
 WHEN MATCHED AND s.a > 8 THEN DELETE
 WHEN MATCHED THEN UPDATE SET b = 7
 WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b);
+
+-- MERGE rewrite must preserve quoting for qualified identifiers like s.`date` when column name is function keyword
+drop table if exists `count`;
+drop table if exists tgt_table;
+create table `count`(a int, `date` int) clustered by (a) into 2 buckets stored as orc
+  TBLPROPERTIES ('transactional'='true');
+create table tgt_table(a int, `date` int) clustered by (a) into 2 buckets stored as orc
+  TBLPROPERTIES ('transactional'='true');
+
+explain merge into tgt_table using `count` ON tgt_table.a = `count`.a
+WHEN MATCHED THEN UPDATE SET `date` = `count`.`date`
+WHEN NOT MATCHED THEN INSERT VALUES(`count`.a, `count`.`date`);
diff --git a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out
index 807948e470f4..f2727b937a53 100644
--- a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out
+++ b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out
@@ -819,3 +819,282 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 
+PREHOOK: query: drop table if exists `count`
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists `count`
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: drop table if exists tgt_table
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists tgt_table
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: create table `count`(a int, `date` int) clustered by (a) into 2 buckets stored as orc
+  TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@count
+POSTHOOK: query: create table `count`(a int, `date` int) clustered by (a) into 2 buckets stored as orc
+  TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@count
+PREHOOK: query: create table tgt_table(a int, `date` int) clustered by (a) into 2 buckets stored as orc
+  TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tgt_table
+POSTHOOK: query: create table tgt_table(a int, `date` int) clustered by (a) into 2 buckets stored as orc
+  TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tgt_table
+PREHOOK: query: explain merge into tgt_table using `count` ON tgt_table.a = `count`.a
+WHEN MATCHED THEN UPDATE SET `date` = `count`.`date`
+WHEN NOT MATCHED THEN INSERT VALUES(`count`.a, `count`.`date`)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@count
+PREHOOK: Input: default@tgt_table
+PREHOOK: Output: default@merge_tmp_table
+PREHOOK: Output: default@tgt_table
+PREHOOK: Output: default@tgt_table
+POSTHOOK: query: explain merge into tgt_table using `count` ON tgt_table.a = `count`.a
+WHEN MATCHED THEN UPDATE SET `date` = `count`.`date`
+WHEN NOT MATCHED THEN INSERT VALUES(`count`.a, `count`.`date`)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@count
+POSTHOOK: Input: default@tgt_table
+POSTHOOK: Output: default@merge_tmp_table
+POSTHOOK: Output: default@tgt_table
+POSTHOOK: Output: default@tgt_table
+STAGE DEPENDENCIES:
+  Stage-3 is a root stage
+  Stage-4 depends on stages: Stage-3
+  Stage-0 depends on stages: Stage-4
+  Stage-5 depends on stages: Stage-0
+  Stage-1 depends on stages: Stage-4
+  Stage-6 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-4
+  Stage-7 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-3
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: count
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), date (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      null sort order: z
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: tgt_table
+                  filterExpr: a is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), a (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: int)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: int)
+                        Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col1 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col3 (type: int), _col1 (type: int), _col2 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: _col1 is null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: int), _col2 (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        null sort order: a
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int)
+                  Filter Operator
+                    predicate: (_col1 = _col0) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col3 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col1 (type: int), _col2 (type: int)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int), _col2 (type: int)
+                  Filter Operator
+                    predicate: (_col1 = _col0) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col3 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                      outputColumnNames: _col3
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        keys: _col3 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                        minReductionHashAggr: 0.99
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                          null sort order: z
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.tgt_table
+                  Write Type: INSERT
+        Reducer 4 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), VALUE._col1 (type: int)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.tgt_table
+                  Write Type: UPDATE
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (_col1 > 1L) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cardinality_violation(_col0) (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.merge_tmp_table
+
+  Stage: Stage-4
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.tgt_table
+          Write Type: INSERT
+
+  Stage: Stage-5
+    Stats Work
+      Basic Stats Work:
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.tgt_table
+          Write Type: UPDATE
+
+  Stage: Stage-6
+    Stats Work
+      Basic Stats Work:
+
+  Stage: Stage-2
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.merge_tmp_table
+
+  Stage: Stage-7
+    Stats Work
+      Basic Stats Work:
+
diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
index 980d6cc6c78a..1f065c1ba74a 100644
--- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
@@ -375,10 +375,10 @@ Stage-0
         Number of rows:1
         TableScan [TS_0]
 
-PREHOOK: query: DROP TEMPORARY MACRO SIGMOID
+PREHOOK: query: DROP TEMPORARY MACRO `SIGMOID`
 PREHOOK: type: DROPMACRO
 PREHOOK: Output: database:default
-POSTHOOK: query: DROP TEMPORARY MACRO SIGMOID
+POSTHOOK: query: DROP TEMPORARY MACRO `SIGMOID`
 POSTHOOK: type: DROPMACRO
 POSTHOOK: Output: database:default
 PREHOOK: query: explain analyze DROP TEMPORARY MACRO SIGMOID