This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new af7b2cefdac branch-4.1:[fix](iceberg)fix iceberg write special
partition error. (#64225)
af7b2cefdac is described below
commit af7b2cefdacab6c25877bd0ed86514e688991e3e
Author: daidai <[email protected]>
AuthorDate: Tue Jun 9 10:07:54 2026 +0800
branch-4.1:[fix](iceberg)fix iceberg write special partition error. (#64225)
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
.../sink/writer/iceberg/iceberg_partition_path.cpp | 52 +++++++++++++
.../sink/writer/iceberg/iceberg_partition_path.h | 33 ++++++++
.../sink/writer/iceberg/viceberg_table_writer.cpp | 4 +-
.../writer/iceberg/iceberg_partition_path_test.cpp | 40 ++++++++++
.../write/test_iceberg_write_partition_path.groovy | 88 ++++++++++++++++++++++
5 files changed, 215 insertions(+), 2 deletions(-)
diff --git a/be/src/exec/sink/writer/iceberg/iceberg_partition_path.cpp
b/be/src/exec/sink/writer/iceberg/iceberg_partition_path.cpp
new file mode 100644
index 00000000000..ecde5ffba3f
--- /dev/null
+++ b/be/src/exec/sink/writer/iceberg/iceberg_partition_path.cpp
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/sink/writer/iceberg/iceberg_partition_path.h"
+
+namespace doris {
+
+namespace {
+
+bool is_unescaped_url_encoder_char(unsigned char ch) {
+ return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z') || ('0' <= ch
&& ch <= '9') ||
+ ch == '.' || ch == '-' || ch == '*' || ch == '_';
+}
+
+char hex_digit(unsigned char value) {
+ return value < 10 ? static_cast<char>('0' + value) : static_cast<char>('A'
+ value - 10);
+}
+
+} // namespace
+
+std::string IcebergPartitionPath::escape(const std::string& path) {
+ std::string escaped;
+ escaped.reserve(path.size());
+ for (unsigned char ch : path) {
+ if (is_unescaped_url_encoder_char(ch)) {
+ escaped.push_back(static_cast<char>(ch));
+ } else if (ch == ' ') {
+ escaped.push_back('+');
+ } else {
+ escaped.push_back('%');
+ escaped.push_back(hex_digit(ch >> 4));
+ escaped.push_back(hex_digit(ch & 0x0F));
+ }
+ }
+ return escaped;
+}
+
+} // namespace doris
diff --git a/be/src/exec/sink/writer/iceberg/iceberg_partition_path.h
b/be/src/exec/sink/writer/iceberg/iceberg_partition_path.h
new file mode 100644
index 00000000000..0b34493e228
--- /dev/null
+++ b/be/src/exec/sink/writer/iceberg/iceberg_partition_path.h
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+
+namespace doris {
+
+class IcebergPartitionPath {
+public:
+ // Match Iceberg Java PartitionSpec.partitionToPath, which uses URLEncoder
with UTF-8.
+ static std::string escape(const std::string& path);
+
+private:
+ IcebergPartitionPath() = default;
+};
+
+} // namespace doris
diff --git a/be/src/exec/sink/writer/iceberg/viceberg_table_writer.cpp
b/be/src/exec/sink/writer/iceberg/viceberg_table_writer.cpp
index c79aa3262bf..dc5f2522122 100644
--- a/be/src/exec/sink/writer/iceberg/viceberg_table_writer.cpp
+++ b/be/src/exec/sink/writer/iceberg/viceberg_table_writer.cpp
@@ -25,10 +25,10 @@
#include "core/column/column_vector.h"
#include "core/data_type/data_type_nullable.h"
#include "core/data_type_serde/data_type_serde.h"
+#include "exec/sink/writer/iceberg/iceberg_partition_path.h"
#include "exec/sink/writer/iceberg/partition_transformers.h"
#include "exec/sink/writer/iceberg/viceberg_partition_writer.h"
#include "exec/sink/writer/iceberg/viceberg_sort_writer.h"
-#include "exec/sink/writer/vhive_utils.h"
#include "exprs/vexpr.h"
#include "exprs/vexpr_context.h"
#include "format/table/iceberg/partition_spec_parser.h"
@@ -517,7 +517,7 @@ std::string VIcebergTableWriter::_partition_to_path(const
doris::iceberg::Struct
}
std::string VIcebergTableWriter::_escape(const std::string& path) {
- return VHiveUtils::escape_path_name(path);
+ return IcebergPartitionPath::escape(path);
}
std::vector<std::string> VIcebergTableWriter::_partition_values(
diff --git a/be/test/exec/sink/writer/iceberg/iceberg_partition_path_test.cpp
b/be/test/exec/sink/writer/iceberg/iceberg_partition_path_test.cpp
new file mode 100644
index 00000000000..fc024bfb213
--- /dev/null
+++ b/be/test/exec/sink/writer/iceberg/iceberg_partition_path_test.cpp
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/sink/writer/iceberg/iceberg_partition_path.h"
+
+#include <gtest/gtest.h>
+
+namespace doris {
+
+class IcebergPartitionPathTest : public testing::Test {};
+
+TEST_F(IcebergPartitionPathTest,
test_escape_matches_iceberg_partition_spec_path_encoding) {
+ EXPECT_EQ("", IcebergPartitionPath::escape(""));
+ EXPECT_EQ("abcXYZ012.-*_", IcebergPartitionPath::escape("abcXYZ012.-*_"));
+ EXPECT_EQ("with+space", IcebergPartitionPath::escape("with space"));
+ EXPECT_EQ("slash%2Fcolon%3Aequals%3Dpercent%25question%3F",
+
IcebergPartitionPath::escape("slash/colon:equals=percent%question?"));
+ EXPECT_EQ("quote%22hash%23brackets%5B%5Dcaret%5E",
+ IcebergPartitionPath::escape("quote\"hash#brackets[]caret^"));
+ EXPECT_EQ("tilde%7Ebang%21plus%2B",
IcebergPartitionPath::escape("tilde~bang!plus+"));
+ EXPECT_EQ(
+ "with%CC%81combining+character",
+ IcebergPartitionPath::escape(std::string("with") + "\xCC\x81" +
"combining character"));
+}
+
+} // namespace doris
diff --git
a/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_partition_path.groovy
b/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_partition_path.groovy
new file mode 100644
index 00000000000..5a1946e20aa
--- /dev/null
+++
b/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_partition_path.groovy
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_iceberg_write_partition_path",
"p0,external,iceberg,external_docker,external_docker_iceberg") {
+ String enabled = context.config.otherConfigs.get("enableIcebergTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("disable iceberg test.")
+ return
+ }
+
+ String catalog_name = "test_iceberg_write_partition_path"
+ String db_name = "test_partition_path_db"
+ String table_name = "test_partition_path_tbl"
+ String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port")
+ String minio_port = context.config.otherConfigs.get("iceberg_minio_port")
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ sql """drop catalog if exists ${catalog_name}"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ "type" = "iceberg",
+ "iceberg.catalog.type" = "rest",
+ "uri" = "http://${externalEnvIp}:${rest_port}",
+ "s3.access_key" = "admin",
+ "s3.secret_key" = "password",
+ "s3.endpoint" = "http://${externalEnvIp}:${minio_port}",
+ "s3.region" = "us-east-1"
+ )
+ """
+
+ try {
+ sql """switch ${catalog_name}"""
+ sql """drop database if exists ${db_name} force"""
+ sql """create database ${db_name}"""
+ sql """use ${db_name}"""
+
+ sql """drop table if exists ${table_name}"""
+ sql """
+ create table ${table_name} (
+ id bigint,
+ part_col string
+ ) engine=iceberg
+ partition by list (part_col) ()
+ properties (
+ "format-version" = "2",
+ "write-format" = "parquet",
+ "write.format.default" = "parquet"
+ )
+ """
+
+ sql """
+ insert into ${table_name} values
+ (1, concat('with', unhex('CC81'), 'combining character')),
+ (2, 'slash/colon:equals=percent%question?')
+ """
+
+ List<List<Object>> rows = sql """select id, hex(part_col) from
${table_name} order by id"""
+ assertEquals(2, rows.size())
+ assertEquals("1", rows[0][0].toString())
+ assertEquals("77697468CC81636F6D62696E696E6720636861726163746572",
rows[0][1].toString())
+ assertEquals("2", rows[1][0].toString())
+
assertEquals("736C6173682F636F6C6F6E3A657175616C733D70657263656E74257175657374696F6E3F",
rows[1][1].toString())
+
+ List<String> filePaths = sql("""select file_path from
${table_name}\$files order by file_path""")
+ .collect { row -> row[0].toString() }
+ logger.info("Iceberg partition file paths: ${filePaths}")
+ assertTrue(filePaths.any { path ->
path.contains("part_col=with%CC%81combining+character") },
+ "Expected Iceberg URL-encoded UTF-8 partition path, actual
paths: ${filePaths}")
+ assertTrue(filePaths.any { path ->
path.contains("part_col=slash%2Fcolon%3Aequals%3Dpercent%25question%3F") },
+ "Expected Iceberg URL-encoded special-character partition
path, actual paths: ${filePaths}")
+ } finally {
+ sql """drop database if exists ${catalog_name}.${db_name} force"""
+ sql """drop catalog if exists ${catalog_name}"""
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]