From bce7d88fae089863bac162b872014ab74194e1ca Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Sat, 18 Apr 2026 18:08:16 +0530 Subject: [PATCH 1/2] HIVE-29551: Avoid quadratic runtime in ColumnStatsSemanticAnalyzer#getColumnTypes --- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 023934d9eb24..0a2cd9750fcb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -24,6 +24,7 @@ import com.google.common.base.Preconditions; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -205,23 +206,28 @@ private static String getColTypeOf(Table tbl, String partKey) { protected static List getColumnTypes(Table tbl, List colNames) { List colTypes = new ArrayList<>(); List cols = tbl.getCols(); - List copyColNames = new ArrayList<>(colNames); - - for (String colName : copyColNames) { - for (FieldSchema col : cols) { - if (colName.equalsIgnoreCase(col.getName())) { - String type = col.getType(); - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); - if (typeInfo.getCategory() != ObjectInspector.Category.PRIMITIVE) { - logTypeWarning(colName, type); - colNames.remove(colName); - } else { - colTypes.add(type); - } + Map colTypeMap = new HashMap<>(); + + for (FieldSchema col : cols) { + colTypeMap.put(col.getName().toLowerCase(), col.getType()); + } + + List nonPrimColNames = new ArrayList<>(); + for (String colName : colNames) { + String type = colTypeMap.get(colName.toLowerCase()); + if (type != null) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); + if (typeInfo.getCategory() != ObjectInspector.Category.PRIMITIVE) { + logTypeWarning(colName, type); + } else { + nonPrimColNames.add(colName); + colTypes.add(type); } } } + colNames.clear(); + colNames.addAll(nonPrimColNames); return colTypes; } From 4a6804d75a2d0bf084dd87b1315f657be7ce3d2a Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Sun, 19 Apr 2026 13:03:02 +0530 Subject: [PATCH 2/2] Update the wrong column name used --- .../hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 0a2cd9750fcb..93f63751cc8b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -212,7 +212,7 @@ protected static List getColumnTypes(Table tbl, List colNames) { colTypeMap.put(col.getName().toLowerCase(), col.getType()); } - List nonPrimColNames = new ArrayList<>(); + List primColNames = new ArrayList<>(); for (String colName : colNames) { String type = colTypeMap.get(colName.toLowerCase()); if (type != null) { @@ -220,14 +220,14 @@ protected static List getColumnTypes(Table tbl, List colNames) { if (typeInfo.getCategory() != ObjectInspector.Category.PRIMITIVE) { logTypeWarning(colName, type); } else { - nonPrimColNames.add(colName); + primColNames.add(colName); colTypes.add(type); } } } colNames.clear(); - colNames.addAll(nonPrimColNames); + colNames.addAll(primColNames); return colTypes; }