This is an automated email from the ASF dual-hosted git repository.
soumyakantidas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 639aa03ee5c HIVE-29474: DESCRIBE FORMATTED for columns produces wrong
output when the column datatype is STRUCT (#6333)
639aa03ee5c is described below
commit 639aa03ee5c3be7ace7fedf9b8fb61d5d2d979d7
Author: Tanishq Chugh <[email protected]>
AuthorDate: Thu Mar 5 22:59:08 2026 +0530
HIVE-29474: DESCRIBE FORMATTED for columns produces wrong output when the
column datatype is STRUCT (#6333)
---
.../ql/ddl/table/info/desc/DescTableOperation.java | 36 +++-
.../queries/clientpositive/desc_cols_formatted.q | 19 ++
.../clientpositive/llap/compustat_avro.q.out | 4 +-
.../clientpositive/llap/desc_cols_formatted.q.out | 199 +++++++++++++++++++++
.../llap/parquet_vectorization_part.q.out | 28 +--
5 files changed, 261 insertions(+), 25 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java
index 6401f9a8d9b..32a1dfbd2b7 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java
@@ -200,18 +200,19 @@ private void getColumnDataColPathSpecified(Table table,
Partition part, List<Fie
if (table.isPartitionKey(colNames.get(0))) {
getColumnDataForPartitionKeyColumn(table, cols, colStats, colNames,
tableProps);
} else {
- getColumnsForNotPartitionKeyColumn(table, cols, colStats,
deserializer, colNames, tableProps);
+ getColumnsForNotPartitionKeyColumn(table, cols, colStats,
deserializer, colName,
+ tableProps);
}
table.setParameters(tableProps);
} else {
- cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(),
deserializer, context.getConf()));
+ cols.addAll(getFilteredFieldsFromDeserializer(table, deserializer,
colName));
colStats.addAll(context.getDb().getTableColumnStatistics(table,
colNames, false));
}
} else {
List<String> partitions = new ArrayList<>();
String partName = part.getName();
partitions.add(partName);
- cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(),
deserializer, context.getConf()));
+ cols.addAll(getFilteredFieldsFromDeserializer(table, deserializer,
colName));
Map<String, List<ColumnStatisticsObj>> partitionColumnStatistics =
context.getDb().getPartitionColumnStatistics(
table.getDbName(), table.getTableName(), partitions, colNames,
false);
List<ColumnStatisticsObj> partitionColStat =
partitionColumnStatistics.get(partName);
@@ -221,6 +222,23 @@ private void getColumnDataColPathSpecified(Table table,
Partition part, List<Fie
}
}
+ private List<FieldSchema> getFilteredFieldsFromDeserializer(Table table,
Deserializer deserializer,
+ String targetColName) throws HiveException {
+ List<FieldSchema> allFields =
Hive.getFieldsFromDeserializer(table.getTableName(), deserializer,
context.getConf());
+ List<FieldSchema> filteredFields = new ArrayList<>();
+
+ for (FieldSchema field : allFields) {
+ if (field.getName() != null &&
targetColName.equalsIgnoreCase(field.getName())) {
+ // The ObjectInspector normalizes column names to lowercase.
+ // To ensure the column name casing is same as in query, setting it
back.
+ field.setName(targetColName);
+ filteredFields.add(field);
+ }
+ }
+
+ return filteredFields;
+ }
+
private void getColumnDataForPartitionKeyColumn(Table table,
List<FieldSchema> cols,
List<ColumnStatisticsObj> colStats, List<String> colNames, Map<String,
String> tableProps)
throws HiveException, MetaException {
@@ -239,18 +257,18 @@ private void getColumnDataForPartitionKeyColumn(Table
table, List<FieldSchema> c
}
private void getColumnsForNotPartitionKeyColumn(Table table,
List<FieldSchema> cols, List<ColumnStatisticsObj> colStats,
- Deserializer deserializer, List<String> colNames, Map<String, String>
tableProps)
+ Deserializer deserializer, String colName, Map<String, String>
tableProps)
throws HiveException {
- cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(),
deserializer, context.getConf()));
+ cols.addAll(getFilteredFieldsFromDeserializer(table, deserializer,
colName));
List<String> parts = context.getDb().getPartitionNames(table, (short) -1);
-
- AggrStats aggrStats = context.getDb().getAggrColStatsFor(table, colNames,
parts, false);
+ AggrStats aggrStats = context.getDb().getAggrColStatsFor(table,
Lists.newArrayList(colName.toLowerCase()),
+ parts, false);
colStats.addAll(aggrStats.getColStats());
if (parts.size() == aggrStats.getPartsFound()) {
- StatsSetupConst.setColumnStatsState(tableProps, colNames);
+ StatsSetupConst.setColumnStatsState(tableProps,
Lists.newArrayList(colName.toLowerCase()));
} else {
- StatsSetupConst.removeColumnStatsState(tableProps, colNames);
+ StatsSetupConst.removeColumnStatsState(tableProps,
Lists.newArrayList(colName.toLowerCase()));
}
}
diff --git a/ql/src/test/queries/clientpositive/desc_cols_formatted.q
b/ql/src/test/queries/clientpositive/desc_cols_formatted.q
new file mode 100644
index 00000000000..bcdc4b66cc6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/desc_cols_formatted.q
@@ -0,0 +1,19 @@
+CREATE TABLE tbl_t (id int, Point STRUCT<x:INT, y:INT>);
+
+DESCRIBE FORMATTED tbl_t;
+
+DESCRIBE FORMATTED tbl_t id;
+DESCRIBE FORMATTED tbl_t Point;
+
+DESCRIBE tbl_t id;
+DESCRIBE tbl_t Point;
+
+CREATE TABLE tbl_part(id int, Point STRUCT<x:INT, y:INT>) PARTITIONED BY (name
string);
+
+DESCRIBE FORMATTED tbl_part;
+
+DESCRIBE FORMATTED tbl_part id;
+DESCRIBE FORMATTED tbl_part Point;
+
+DESCRIBE tbl_part id;
+DESCRIBE tbl_part Point;
diff --git a/ql/src/test/results/clientpositive/llap/compustat_avro.q.out
b/ql/src/test/results/clientpositive/llap/compustat_avro.q.out
index 3d80d716c60..312115e03af 100644
--- a/ql/src/test/results/clientpositive/llap/compustat_avro.q.out
+++ b/ql/src/test/results/clientpositive/llap/compustat_avro.q.out
@@ -43,7 +43,7 @@ max_col_len
num_trues
num_falses
bit_vector
-comment from deserializer
+comment
COLUMN_STATS_ACCURATE
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}}
PREHOOK: query: analyze table testAvro compute statistics for columns col1,col3
PREHOOK: type: ANALYZE_TABLE
@@ -72,6 +72,6 @@ max_col_len 0
num_trues
num_falses
bit_vector
-comment from deserializer
+comment
COLUMN_STATS_ACCURATE
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\",\"col6\":\"true\"}}
#### A masked pattern was here ####
diff --git a/ql/src/test/results/clientpositive/llap/desc_cols_formatted.q.out
b/ql/src/test/results/clientpositive/llap/desc_cols_formatted.q.out
new file mode 100644
index 00000000000..375576b1479
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/desc_cols_formatted.q.out
@@ -0,0 +1,199 @@
+PREHOOK: query: CREATE TABLE tbl_t (id int, Point STRUCT<x:INT, y:INT>)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_t
+POSTHOOK: query: CREATE TABLE tbl_t (id int, Point STRUCT<x:INT, y:INT>)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_t
+PREHOOK: query: DESCRIBE FORMATTED tbl_t
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_t
+POSTHOOK: query: DESCRIBE FORMATTED tbl_t
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_t
+# col_name data_type comment
+id int
+point struct<x:int,y:int>
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"point\":\"true\"}}
+ bucketing_version 2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ totalSize #Masked#
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: DESCRIBE FORMATTED tbl_t id
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_t
+POSTHOOK: query: DESCRIBE FORMATTED tbl_t id
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_t
+col_name id
+data_type int
+min
+max
+num_nulls
+distinct_count
+avg_col_len
+max_col_len
+num_trues
+num_falses
+bit_vector
+comment from deserializer
+COLUMN_STATS_ACCURATE
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"point\":\"true\"}}
+PREHOOK: query: DESCRIBE FORMATTED tbl_t Point
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_t
+POSTHOOK: query: DESCRIBE FORMATTED tbl_t Point
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_t
+col_name Point
+data_type struct<x:int,y:int>
+min
+max
+num_nulls
+distinct_count
+avg_col_len
+max_col_len
+num_trues
+num_falses
+bit_vector
+comment from deserializer
+COLUMN_STATS_ACCURATE
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"point\":\"true\"}}
+PREHOOK: query: DESCRIBE tbl_t id
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_t
+POSTHOOK: query: DESCRIBE tbl_t id
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_t
+id int from deserializer
+COLUMN_STATS_ACCURATE
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"point\":\"true\"}}
+PREHOOK: query: DESCRIBE tbl_t Point
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_t
+POSTHOOK: query: DESCRIBE tbl_t Point
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_t
+x int from deserializer
+y int from deserializer
+COLUMN_STATS_ACCURATE
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"point\":\"true\"}}
+PREHOOK: query: CREATE TABLE tbl_part(id int, Point STRUCT<x:INT, y:INT>)
PARTITIONED BY (name string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_part
+POSTHOOK: query: CREATE TABLE tbl_part(id int, Point STRUCT<x:INT, y:INT>)
PARTITIONED BY (name string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_part
+PREHOOK: query: DESCRIBE FORMATTED tbl_part
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_part
+POSTHOOK: query: DESCRIBE FORMATTED tbl_part
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_part
+# col_name data_type comment
+id int
+point struct<x:int,y:int>
+
+# Partition Information
+# col_name data_type comment
+name string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ bucketing_version 2
+ numFiles 0
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ totalSize #Masked#
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: DESCRIBE FORMATTED tbl_part id
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_part
+POSTHOOK: query: DESCRIBE FORMATTED tbl_part id
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_part
+col_name id
+data_type int
+min
+max
+num_nulls
+distinct_count
+avg_col_len
+max_col_len
+num_trues
+num_falses
+bit_vector
+comment from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"id\":\"true\"}}
+PREHOOK: query: DESCRIBE FORMATTED tbl_part Point
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_part
+POSTHOOK: query: DESCRIBE FORMATTED tbl_part Point
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_part
+col_name Point
+data_type struct<x:int,y:int>
+min
+max
+num_nulls
+distinct_count
+avg_col_len
+max_col_len
+num_trues
+num_falses
+bit_vector
+comment from deserializer
+COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"point\":\"true\"}}
+PREHOOK: query: DESCRIBE tbl_part id
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_part
+POSTHOOK: query: DESCRIBE tbl_part id
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_part
+id int from deserializer
+PREHOOK: query: DESCRIBE tbl_part Point
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_part
+POSTHOOK: query: DESCRIBE tbl_part Point
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_part
+x int from deserializer
+y int from deserializer
diff --git
a/ql/src/test/results/clientpositive/llap/parquet_vectorization_part.q.out
b/ql/src/test/results/clientpositive/llap/parquet_vectorization_part.q.out
index b1bd3bc5367..4344f933881 100644
--- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_part.q.out
+++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_part.q.out
@@ -105,7 +105,7 @@ max_col_len
num_trues
num_falses
bit_vector HL
-comment from deserializer
+comment
PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2011')
csmallint
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesparquet_part
@@ -123,7 +123,7 @@ max_col_len
num_trues
num_falses
bit_vector HL
-comment from deserializer
+comment
PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2011')
cfloat
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesparquet_part
@@ -141,7 +141,7 @@ max_col_len
num_trues
num_falses
bit_vector HL
-comment from deserializer
+comment
PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2011')
cdouble
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesparquet_part
@@ -159,7 +159,7 @@ max_col_len
num_trues
num_falses
bit_vector HL
-comment from deserializer
+comment
PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2011')
cstring1
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesparquet_part
@@ -177,7 +177,7 @@ max_col_len 16
num_trues
num_falses
bit_vector HL
-comment from deserializer
+comment
PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2011')
ctimestamp1
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesparquet_part
@@ -195,7 +195,7 @@ max_col_len
num_trues
num_falses
bit_vector HL
-comment from deserializer
+comment
PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2011')
cboolean1
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesparquet_part
@@ -213,7 +213,7 @@ max_col_len
num_trues 100
num_falses 0
bit_vector
-comment from deserializer
+comment
PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2012')
ctinyint
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesparquet_part
@@ -231,7 +231,7 @@ max_col_len
num_trues
num_falses
bit_vector HL
-comment from deserializer
+comment
PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2012')
csmallint
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesparquet_part
@@ -249,7 +249,7 @@ max_col_len
num_trues
num_falses
bit_vector HL
-comment from deserializer
+comment
PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2012')
cfloat
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesparquet_part
@@ -267,7 +267,7 @@ max_col_len
num_trues
num_falses
bit_vector HL
-comment from deserializer
+comment
PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2012')
cdouble
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesparquet_part
@@ -285,7 +285,7 @@ max_col_len
num_trues
num_falses
bit_vector HL
-comment from deserializer
+comment
PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2012')
cstring1
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesparquet_part
@@ -303,7 +303,7 @@ max_col_len 16
num_trues
num_falses
bit_vector HL
-comment from deserializer
+comment
PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2012')
ctimestamp1
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesparquet_part
@@ -321,7 +321,7 @@ max_col_len
num_trues
num_falses
bit_vector HL
-comment from deserializer
+comment
PREHOOK: query: describe formatted alltypesparquet_part PARTITION(ds='2012')
cboolean1
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@alltypesparquet_part
@@ -339,4 +339,4 @@ max_col_len
num_trues 100
num_falses 0
bit_vector
-comment from deserializer
+comment