This is an automated email from the ASF dual-hosted git repository.

soumyakantidas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 639aa03ee5c HIVE-29474: DESCRIBE FORMATTED for columns produces wrong 
output when the column datatype is STRUCT (#6333)
639aa03ee5c is described below

commit 639aa03ee5c3be7ace7fedf9b8fb61d5d2d979d7
Author: Tanishq Chugh <[email protected]>
AuthorDate: Thu Mar 5 22:59:08 2026 +0530

    HIVE-29474: DESCRIBE FORMATTED for columns produces wrong output when the 
column datatype is STRUCT (#6333)
---
 .../ql/ddl/table/info/desc/DescTableOperation.java |  36 +++-
 .../queries/clientpositive/desc_cols_formatted.q   |  19 ++
 .../clientpositive/llap/compustat_avro.q.out       |   4 +-
 .../clientpositive/llap/desc_cols_formatted.q.out  | 199 +++++++++++++++++++++
 .../llap/parquet_vectorization_part.q.out          |  28 +--
 5 files changed, 261 insertions(+), 25 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java
index 6401f9a8d9b..32a1dfbd2b7 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java
@@ -200,18 +200,19 @@ private void getColumnDataColPathSpecified(Table table, 
Partition part, List<Fie
         if (table.isPartitionKey(colNames.get(0))) {
           getColumnDataForPartitionKeyColumn(table, cols, colStats, colNames, 
tableProps);
         } else {
-          getColumnsForNotPartitionKeyColumn(table, cols, colStats, 
deserializer, colNames, tableProps);
+          getColumnsForNotPartitionKeyColumn(table, cols, colStats, 
deserializer, colName,
+              tableProps);
         }
         table.setParameters(tableProps);
       } else {
-        cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(), 
deserializer, context.getConf()));
+        cols.addAll(getFilteredFieldsFromDeserializer(table, deserializer, 
colName));
         colStats.addAll(context.getDb().getTableColumnStatistics(table, 
colNames, false));
       }
     } else {
       List<String> partitions = new ArrayList<>();
       String partName = part.getName();
       partitions.add(partName);
-      cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(), 
deserializer, context.getConf()));
+      cols.addAll(getFilteredFieldsFromDeserializer(table, deserializer, 
colName));
       Map<String, List<ColumnStatisticsObj>> partitionColumnStatistics = 
context.getDb().getPartitionColumnStatistics(
           table.getDbName(), table.getTableName(), partitions, colNames, 
false);
       List<ColumnStatisticsObj> partitionColStat = 
partitionColumnStatistics.get(partName);
@@ -221,6 +222,23 @@ private void getColumnDataColPathSpecified(Table table, 
Partition part, List<Fie
     }
   }
 
+  private List<FieldSchema> getFilteredFieldsFromDeserializer(Table table, 
Deserializer deserializer,
+      String targetColName) throws HiveException {
+    List<FieldSchema> allFields = 
Hive.getFieldsFromDeserializer(table.getTableName(), deserializer, 
context.getConf());
+    List<FieldSchema> filteredFields = new ArrayList<>();
+    
+    for (FieldSchema field : allFields) {
+      if (field.getName() != null && 
targetColName.equalsIgnoreCase(field.getName())) {
+        // The ObjectInspector normalizes column names to lowercase.
+        // To ensure the column name casing is same as in query, setting it 
back.
+        field.setName(targetColName);
+        filteredFields.add(field);
+      }
+    }
+    
+    return filteredFields;
+  }
+
   private void getColumnDataForPartitionKeyColumn(Table table, 
List<FieldSchema> cols,
       List<ColumnStatisticsObj> colStats, List<String> colNames, Map<String, 
String> tableProps)
       throws HiveException, MetaException {
@@ -239,18 +257,18 @@ private void getColumnDataForPartitionKeyColumn(Table 
table, List<FieldSchema> c
   }
 
   private void getColumnsForNotPartitionKeyColumn(Table table, 
List<FieldSchema> cols, List<ColumnStatisticsObj> colStats,
-      Deserializer deserializer, List<String> colNames, Map<String, String> 
tableProps)
+      Deserializer deserializer, String colName, Map<String, String> 
tableProps)
       throws HiveException {
-    cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(), 
deserializer, context.getConf()));
+    cols.addAll(getFilteredFieldsFromDeserializer(table, deserializer, 
colName));
     List<String> parts = context.getDb().getPartitionNames(table, (short) -1);
-    
-    AggrStats aggrStats = context.getDb().getAggrColStatsFor(table, colNames, 
parts, false);
+    AggrStats aggrStats = context.getDb().getAggrColStatsFor(table, 
Lists.newArrayList(colName.toLowerCase()),
+        parts, false);
     colStats.addAll(aggrStats.getColStats());
     
     if (parts.size() == aggrStats.getPartsFound()) {
-      StatsSetupConst.setColumnStatsState(tableProps, colNames);
+      StatsSetupConst.setColumnStatsState(tableProps, 
Lists.newArrayList(colName.toLowerCase()));
     } else {
-      StatsSetupConst.removeColumnStatsState(tableProps, colNames);
+      StatsSetupConst.removeColumnStatsState(tableProps, 
Lists.newArrayList(colName.toLowerCase()));
     }
   }
 
diff --git a/ql/src/test/queries/clientpositive/desc_cols_formatted.q 
b/ql/src/test/queries/clientpositive/desc_cols_formatted.q
new file mode 100644
index 00000000000..bcdc4b66cc6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/desc_cols_formatted.q
@@ -0,0 +1,19 @@
+CREATE TABLE tbl_t (id int, Point STRUCT<x:INT, y:INT>);
+
+DESCRIBE FORMATTED tbl_t;
+
+DESCRIBE FORMATTED tbl_t id;
+DESCRIBE FORMATTED tbl_t Point;
+
+DESCRIBE tbl_t id;
+DESCRIBE tbl_t Point;
+
+CREATE TABLE tbl_part(id int, Point STRUCT<x:INT, y:INT>) PARTITIONED BY (name 
string);
+
+DESCRIBE FORMATTED tbl_part;
+
+DESCRIBE FORMATTED tbl_part id;
+DESCRIBE FORMATTED tbl_part Point;
+
+DESCRIBE tbl_part id;
+DESCRIBE tbl_part Point;
diff --git a/ql/src/test/results/clientpositive/llap/compustat_avro.q.out 
b/ql/src/test/results/clientpositive/llap/compustat_avro.q.out
index 3d80d716c60..312115e03af 100644
--- a/ql/src/test/results/clientpositive/llap/compustat_avro.q.out
+++ b/ql/src/test/results/clientpositive/llap/compustat_avro.q.out
@@ -43,7 +43,7 @@ max_col_len
 num_trues                                  
 num_falses                                 
 bit_vector                                 
-comment                from deserializer   
+comment                                    
 COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}}
 PREHOOK: query: analyze table testAvro compute statistics for columns col1,col3
 PREHOOK: type: ANALYZE_TABLE
@@ -72,6 +72,6 @@ max_col_len           0
 num_trues                                  
 num_falses                                 
 bit_vector                                 
-comment                from deserializer   
+comment                                    
 COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}}
 #### A masked pattern was here ####
diff --git a/ql/src/test/results/clientpositive/llap/desc_cols_formatted.q.out 
b/ql/src/test/results/clientpositive/llap/desc_cols_formatted.q.out
new file mode 100644
index 00000000000..375576b1479
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/desc_cols_formatted.q.out
@@ -0,0 +1,199 @@
+PREHOOK: query: CREATE TABLE tbl_t (id int, Point STRUCT<x:INT, y:INT>)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_t
+POSTHOOK: query: CREATE TABLE tbl_t (id int, Point STRUCT<x:INT, y:INT>)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_t
+PREHOOK: query: DESCRIBE FORMATTED tbl_t
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_t
+POSTHOOK: query: DESCRIBE FORMATTED tbl_t
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_t
+# col_name             data_type               comment             
+id                     int                                         
+point                  struct<x:int,y:int>                         
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+#### A masked pattern was here ####
+Table Type:            MANAGED_TABLE            
+Table Parameters:               
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"point\":\"true\"}}
+       bucketing_version       2                   
+       numFiles                0                   
+       numRows                 0                   
+       rawDataSize             0                   
+       totalSize               #Masked#
+#### A masked pattern was here ####
+                
+# Storage Information           
+SerDe Library:         org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe      
 
+InputFormat:           org.apache.hadoop.mapred.TextInputFormat         
+OutputFormat:          
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat       
+Compressed:            No                       
+Num Buckets:           -1                       
+Bucket Columns:        []                       
+Sort Columns:          []                       
+Storage Desc Params:            
+       serialization.format    1                   
+PREHOOK: query: DESCRIBE FORMATTED tbl_t id
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_t
+POSTHOOK: query: DESCRIBE FORMATTED tbl_t id
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_t
+col_name               id                  
+data_type              int                 
+min                                        
+max                                        
+num_nulls                                  
+distinct_count                             
+avg_col_len                                
+max_col_len                                
+num_trues                                  
+num_falses                                 
+bit_vector                                 
+comment                from deserializer   
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"point\":\"true\"}}
+PREHOOK: query: DESCRIBE FORMATTED tbl_t Point
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_t
+POSTHOOK: query: DESCRIBE FORMATTED tbl_t Point
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_t
+col_name               Point               
+data_type              struct<x:int,y:int> 
+min                                        
+max                                        
+num_nulls                                  
+distinct_count                             
+avg_col_len                                
+max_col_len                                
+num_trues                                  
+num_falses                                 
+bit_vector                                 
+comment                from deserializer   
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"point\":\"true\"}}
+PREHOOK: query: DESCRIBE tbl_t id
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_t
+POSTHOOK: query: DESCRIBE tbl_t id
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_t
+id                     int                     from deserializer   
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"point\":\"true\"}}
         
+PREHOOK: query: DESCRIBE tbl_t Point
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_t
+POSTHOOK: query: DESCRIBE tbl_t Point
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_t
+x                      int                     from deserializer   
+y                      int                     from deserializer   
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"point\":\"true\"}}
         
+PREHOOK: query: CREATE TABLE tbl_part(id int, Point STRUCT<x:INT, y:INT>) 
PARTITIONED BY (name string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_part
+POSTHOOK: query: CREATE TABLE tbl_part(id int, Point STRUCT<x:INT, y:INT>) 
PARTITIONED BY (name string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_part
+PREHOOK: query: DESCRIBE FORMATTED tbl_part
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_part
+POSTHOOK: query: DESCRIBE FORMATTED tbl_part
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_part
+# col_name             data_type               comment             
+id                     int                                         
+point                  struct<x:int,y:int>                         
+                
+# Partition Information                 
+# col_name             data_type               comment             
+name                   string                                      
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+#### A masked pattern was here ####
+Table Type:            MANAGED_TABLE            
+Table Parameters:               
+       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       bucketing_version       2                   
+       numFiles                0                   
+       numPartitions           0                   
+       numRows                 0                   
+       rawDataSize             0                   
+       totalSize               #Masked#
+#### A masked pattern was here ####
+                
+# Storage Information           
+SerDe Library:         org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe      
 
+InputFormat:           org.apache.hadoop.mapred.TextInputFormat         
+OutputFormat:          
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat       
+Compressed:            No                       
+Num Buckets:           -1                       
+Bucket Columns:        []                       
+Sort Columns:          []                       
+Storage Desc Params:            
+       serialization.format    1                   
+PREHOOK: query: DESCRIBE FORMATTED tbl_part id
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_part
+POSTHOOK: query: DESCRIBE FORMATTED tbl_part id
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_part
+col_name               id                  
+data_type              int                 
+min                                        
+max                                        
+num_nulls                                  
+distinct_count                             
+avg_col_len                                
+max_col_len                                
+num_trues                                  
+num_falses                                 
+bit_vector                                 
+comment                from deserializer   
+COLUMN_STATS_ACCURATE  {\"COLUMN_STATS\":{\"id\":\"true\"}}
+PREHOOK: query: DESCRIBE FORMATTED tbl_part Point
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_part
+POSTHOOK: query: DESCRIBE FORMATTED tbl_part Point
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_part
+col_name               Point               
+data_type              struct<x:int,y:int> 
+min                                        
+max                                        
+num_nulls                                  
+distinct_count                             
+avg_col_len                                
+max_col_len                                
+num_trues                                  
+num_falses                                 
+bit_vector                                 
+comment                from deserializer   
+COLUMN_STATS_ACCURATE  {\"COLUMN_STATS\":{\"point\":\"true\"}}
+PREHOOK: query: DESCRIBE tbl_part id
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_part
+POSTHOOK: query: DESCRIBE tbl_part id
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_part
+id                     int                     from deserializer   
+PREHOOK: query: DESCRIBE tbl_part Point
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_part
+POSTHOOK: query: DESCRIBE tbl_part Point
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_part
+x                      int                     from deserializer   
+y                      int                     from deserializer   
diff --git 
a/ql/src/test/results/clientpositive/llap/parquet_vectorization_part.q.out 
b/ql/src/test/results/clientpositive/llap/parquet_vectorization_part.q.out
index b1bd3bc5367..4344f933881 100644
--- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_part.q.out
+++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_part.q.out
@@ -105,7 +105,7 @@ max_col_len
 num_trues                                  
 num_falses                                 
 bit_vector             HL                  
-comment                from deserializer   
+comment                                    
 PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2011') 
csmallint
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@alltypesparquet_part
@@ -123,7 +123,7 @@ max_col_len
 num_trues                                  
 num_falses                                 
 bit_vector             HL                  
-comment                from deserializer   
+comment                                    
 PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2011') 
cfloat
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@alltypesparquet_part
@@ -141,7 +141,7 @@ max_col_len
 num_trues                                  
 num_falses                                 
 bit_vector             HL                  
-comment                from deserializer   
+comment                                    
 PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2011') 
cdouble
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@alltypesparquet_part
@@ -159,7 +159,7 @@ max_col_len
 num_trues                                  
 num_falses                                 
 bit_vector             HL                  
-comment                from deserializer   
+comment                                    
 PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2011') 
cstring1
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@alltypesparquet_part
@@ -177,7 +177,7 @@ max_col_len                 16
 num_trues                                  
 num_falses                                 
 bit_vector             HL                  
-comment                from deserializer   
+comment                                    
 PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2011') 
ctimestamp1
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@alltypesparquet_part
@@ -195,7 +195,7 @@ max_col_len
 num_trues                                  
 num_falses                                 
 bit_vector             HL                  
-comment                from deserializer   
+comment                                    
 PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2011') 
cboolean1
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@alltypesparquet_part
@@ -213,7 +213,7 @@ max_col_len
 num_trues              100                 
 num_falses             0                   
 bit_vector                                 
-comment                from deserializer   
+comment                                    
 PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2012') 
ctinyint
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@alltypesparquet_part
@@ -231,7 +231,7 @@ max_col_len
 num_trues                                  
 num_falses                                 
 bit_vector             HL                  
-comment                from deserializer   
+comment                                    
 PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2012') 
csmallint
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@alltypesparquet_part
@@ -249,7 +249,7 @@ max_col_len
 num_trues                                  
 num_falses                                 
 bit_vector             HL                  
-comment                from deserializer   
+comment                                    
 PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2012') 
cfloat
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@alltypesparquet_part
@@ -267,7 +267,7 @@ max_col_len
 num_trues                                  
 num_falses                                 
 bit_vector             HL                  
-comment                from deserializer   
+comment                                    
 PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2012') 
cdouble
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@alltypesparquet_part
@@ -285,7 +285,7 @@ max_col_len
 num_trues                                  
 num_falses                                 
 bit_vector             HL                  
-comment                from deserializer   
+comment                                    
 PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2012') 
cstring1
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@alltypesparquet_part
@@ -303,7 +303,7 @@ max_col_len                 16
 num_trues                                  
 num_falses                                 
 bit_vector             HL                  
-comment                from deserializer   
+comment                                    
 PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2012') 
ctimestamp1
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@alltypesparquet_part
@@ -321,7 +321,7 @@ max_col_len
 num_trues                                  
 num_falses                                 
 bit_vector             HL                  
-comment                from deserializer   
+comment                                    
 PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2012') 
cboolean1
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@alltypesparquet_part
@@ -339,4 +339,4 @@ max_col_len
 num_trues              100                 
 num_falses             0                   
 bit_vector                                 
-comment                from deserializer   
+comment                                    

Reply via email to