This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new bf219503488 [fix](hive) fix write hive partition by Doris (#50864)
bf219503488 is described below
commit bf219503488b89341fde7428a80018dad5789871
Author: Socrates <[email protected]>
AuthorDate: Wed May 14 23:25:50 2025 +0800
[fix](hive) fix write hive partition by Doris (#50864)
### What problem does this PR solve?
Problem Summary:
After writing to a Hive partitioned table and adding a new partition
using Doris, writing data to the same partition using Hive results in an
error: "Partition column xxx conflicts with table columns."
---
.../docker-compose/hive/hadoop-hive-2x.env.tpl | 2 ++
.../docker-compose/hive/hadoop-hive-3x.env.tpl | 2 +-
.../docker-compose/hive/hadoop-hive.env.tpl | 1 +
.../scripts/create_preinstalled_scripts/run77.hql | 30 +++++++++++++++++++++
.../doris/datasource/hive/HMSTransaction.java | 12 +++------
.../datasource/paimon/source/PaimonScanNode.java | 2 +-
.../hive/write/test_hive_write_partitions.out | Bin 73239 -> 74043 bytes
.../hive/write/test_hive_write_partitions.groovy | 22 ++++++++++++---
8 files changed, 57 insertions(+), 14 deletions(-)
diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive-2x.env.tpl
b/docker/thirdparties/docker-compose/hive/hadoop-hive-2x.env.tpl
index 6222972176a..f622f28bc03 100644
--- a/docker/thirdparties/docker-compose/hive/hadoop-hive-2x.env.tpl
+++ b/docker/thirdparties/docker-compose/hive/hadoop-hive-2x.env.tpl
@@ -15,3 +15,5 @@
# limitations under the License.
#
+
+HIVE_SITE_CONF_hive_stats_column_autogather=false
diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
b/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
index 84bfce1754f..4d92bab5351 100644
--- a/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
+++ b/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
@@ -19,4 +19,4 @@
HIVE_SITE_CONF_hive_metastore_event_db_notification_api_auth=false
HIVE_SITE_CONF_hive_metastore_dml_events=true
HIVE_SITE_CONF_hive_metastore_transactional_event_listeners=org.apache.hive.hcatalog.listener.DbNotificationListener
-
+HIVE_SITE_CONF_hive_stats_column_autogather=false
diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
b/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
index 0e074228410..7db18ab998f 100644
--- a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
+++ b/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
@@ -28,6 +28,7 @@ HIVE_SITE_CONF_hive_server2_webui_port=0
HIVE_SITE_CONF_hive_compactor_initiator_on=true
HIVE_SITE_CONF_hive_compactor_worker_threads=2
HIVE_SITE_CONF_metastore_storage_schema_reader_impl=org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader
+HIVE_SITE_CONF_hive_stats_column_autogather=false
CORE_CONF_fs_defaultFS=hdfs://${IP_HOST}:${FS_PORT}
CORE_CONF_hadoop_http_staticuser_user=root
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run77.hql
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run77.hql
new file mode 100755
index 00000000000..209981b60d3
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run77.hql
@@ -0,0 +1,30 @@
+create database if not exists write_test;
+use write_test;
+
+DROP TABLE IF EXISTS test_doris_write_hive_partition_table_original;
+CREATE TABLE test_doris_write_hive_partition_table_original (
+ `v1` decimal(3,0),
+ `v2` string )
+PARTITIONED BY (
+ `test_date` string,
+ `v3` string)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';
+
+INSERT INTO TABLE test_doris_write_hive_partition_table_original PARTITION
(test_date='2025-05-01', v3='project1')
+VALUES (1, 'test1'),
+ (2, 'test2');
+
+INSERT INTO TABLE test_doris_write_hive_partition_table_original PARTITION
(test_date='2025-05-01', v3='project2')
+VALUES (3, 'test3');
+
+INSERT INTO TABLE test_doris_write_hive_partition_table_original PARTITION
(test_date='2025-05-02', v3='project1')
+VALUES (4, 'test4');
+
+INSERT INTO TABLE test_doris_write_hive_partition_table_original PARTITION
(test_date='2025-05-02', v3='project2')
+VALUES (5, 'test5'),
+ (6, 'test6');
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
index 9b88f7a8dea..dce7cc7cdd7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
@@ -279,7 +279,7 @@ public class HMSTransaction implements Transaction {
Maps.newHashMap(),
sd.getOutputFormat(),
sd.getSerdeInfo().getSerializationLib(),
- getTableColumns(tableInfo)
+ sd.getCols()
);
if (updateMode == TUpdateMode.OVERWRITE) {
dropPartition(tableInfo,
hivePartition.getPartitionValues(), true);
@@ -436,7 +436,7 @@ public class HMSTransaction implements Transaction {
partition.getParameters(),
sd.getOutputFormat(),
sd.getSerdeInfo().getSerializationLib(),
- getTableColumns(tableInfo)
+ sd.getCols()
);
partitionActionsForTable.put(
@@ -919,11 +919,6 @@ public class HMSTransaction implements Transaction {
throw new RuntimeException("Not Found table: " + tableInfo);
}
- public synchronized List<FieldSchema> getTableColumns(SimpleTableInfo
tableInfo) {
- return tableColumns.computeIfAbsent(tableInfo,
- key -> hiveOps.getClient().getSchema(tableInfo.getDbName(),
tableInfo.getTbName()));
- }
-
public synchronized void finishChangingExistingTable(
ActionType actionType,
SimpleTableInfo tableInfo,
@@ -1282,7 +1277,7 @@ public class HMSTransaction implements Transaction {
Maps.newHashMap(),
sd.getOutputFormat(),
sd.getSerdeInfo().getSerializationLib(),
- getTableColumns(tableInfo)
+ sd.getCols()
);
HivePartitionWithStatistics partitionWithStats =
@@ -1654,4 +1649,3 @@ public class HMSTransaction implements Transaction {
}
}
}
-
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
index 023bbe9cd4d..b5990e9f0b2 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
@@ -412,7 +412,7 @@ public class PaimonScanNode extends FileQueryScanNode {
@Override
public List<String> getPathPartitionKeys() throws DdlException,
MetaNotFoundException {
// return new ArrayList<>(source.getPaimonTable().partitionKeys());
- // Paymon is not aware of partitions and bypasses some existing logic
by
+ // Paimon is not aware of partitions and bypasses some existing logic
by
// returning an empty list
return new ArrayList<>();
}
diff --git
a/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out
b/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out
index 209315d1a9e..4d68899e605 100644
Binary files
a/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out
and
b/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out
differ
diff --git
a/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy
b/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy
index 7e3f070636e..bf59f5e3d55 100644
---
a/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy
+++
b/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy
@@ -189,13 +189,30 @@ suite("test_hive_write_partitions",
"p0,external,hive,external_docker,external_d
hive_docker """ DROP TABLE IF EXISTS
all_partition_types1_${format_compression}_${catalog_name}_q04; """
}
+ def test_doris_write_hive_partition_table = { String catalog_name ->
+ // After writing to a Hive partitioned table and adding a new
partition using Doris,
+ // writing data to the same partition using Hive results in an error:
"Partition column xxx conflicts with table columns."
+ String tableName = "test_doris_write_hive_partition_table"
+ String originalTableName =
"test_doris_write_hive_partition_table_original"
+ hive_docker """ drop table if exists ${tableName}; """
+ hive_docker """ create table ${tableName} like ${originalTableName};
"""
+ sql """ refresh catalog ${catalog_name};"""
+ // Insert data and add new partitions by doris
+ sql """ insert into ${tableName} select * from ${originalTableName}
where test_date between '2025-05-01' and '2025-05-02'; """
+ order_qt_test_doris_write_hive_partition_table1 """ select * from
${tableName}; """
+ // Overwrite the partition by hive, this will cause error before fix pr
+ hive_docker """ insert overwrite table ${tableName}
partition(test_date='2025-05-01',v3='project1') values(7, 'test7');"""
+ sql """ refresh catalog ${catalog_name};"""
+ order_qt_test_doris_write_hive_partition_table2 """ select * from
${tableName}; """
+ }
+
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
logger.info("disable Hive test.")
return;
}
- for (String hivePrefix : ["hive3"]) {
+ for (String hivePrefix : ["hive2", "hive3"]) {
setHivePrefix(hivePrefix)
try {
String hms_port = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
@@ -222,10 +239,9 @@ suite("test_hive_write_partitions",
"p0,external,hive,external_docker,external_d
q03(format_compression, catalog_name)
q04(format_compression, catalog_name)
}
+ test_doris_write_hive_partition_table(catalog_name)
sql """drop catalog if exists ${catalog_name}"""
} finally {
}
}
}
-
-
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]