This is an automated email from the ASF dual-hosted git repository.
zhangstar333 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2366edffcc6 [bug](iceberg) fix can't get migrated Iceberg tables
format type (#64134)
2366edffcc6 is described below
commit 2366edffcc638ef8ca2a49ca8694763bc6453b54
Author: zhangstar333 <[email protected]>
AuthorDate: Tue Jun 9 14:13:27 2026 +0800
[bug](iceberg) fix can't get migrated Iceberg tables format type (#64134)
### What problem does this PR solve?
Problem Summary:
```
[CORRUPTION]Invalid magic number in parquet file, bytes read: 253, file
size: 253,
path: /user/hive/warehouse/test_migrate_managed_...,
read magic: ORC .
```
The migrated Iceberg table properties don't have "write-format" or
"write.format.default".
so doris use the default type of parquet as the table format. but it's
actual a ORC type.
now add more check to infer the table format type.
---
.../create_preinstalled_scripts/iceberg/run10.sql | 4 +--
.../doris/datasource/iceberg/IcebergUtils.java | 40 ++++++++++++++++++----
2 files changed, 36 insertions(+), 8 deletions(-)
diff --git
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run10.sql
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run10.sql
index 0d2b2240de4..650ffdec575 100644
---
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run10.sql
+++
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run10.sql
@@ -8,7 +8,7 @@ CREATE TABLE sc_drop_add_orc (
)
USING iceberg
PARTITIONED BY (id)
-TBLPROPERTIES ('format'='orc');
+TBLPROPERTIES ('write.format.default' = 'orc');
INSERT INTO sc_drop_add_orc VALUES (1, 'Alice', 25);
INSERT INTO sc_drop_add_orc VALUES (2, 'Bob', 30);
@@ -32,7 +32,7 @@ CREATE TABLE sc_drop_add_parquet (
)
USING iceberg
PARTITIONED BY (id)
-TBLPROPERTIES ('format'='parquet');
+TBLPROPERTIES ('write.format.default' = 'parquet');
INSERT INTO sc_drop_add_parquet VALUES (1, 'Alice', 25);
INSERT INTO sc_drop_add_parquet VALUES (2, 'Bob', 30);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index 7a3be773cf0..932bb8b86da 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -1141,12 +1141,7 @@ public class IcebergUtils {
public static FileFormat getFileFormat(Table icebergTable) {
Map<String, String> properties = icebergTable.properties();
- String fileFormatName;
- if (properties.containsKey(WRITE_FORMAT)) {
- fileFormatName = properties.get(WRITE_FORMAT);
- } else {
- fileFormatName =
properties.getOrDefault(TableProperties.DEFAULT_FILE_FORMAT, PARQUET_NAME);
- }
+ String fileFormatName = resolveFileFormatName(icebergTable,
properties);
FileFormat fileFormat;
if (fileFormatName.toLowerCase().contains(ORC_NAME)) {
fileFormat = FileFormat.ORC;
@@ -1158,6 +1153,39 @@ public class IcebergUtils {
return fileFormat;
}
+ private static String resolveFileFormatName(Table icebergTable,
Map<String, String> properties) {
+ // 1. Check "write-format" (nickname in Flink and Spark)
+ if (properties.containsKey(WRITE_FORMAT)) {
+ return properties.get(WRITE_FORMAT);
+ }
+ // 2. Check "write.format.default" (standard Iceberg property)
+ if (properties.containsKey(TableProperties.DEFAULT_FILE_FORMAT)) {
+ return properties.get(TableProperties.DEFAULT_FILE_FORMAT);
+ }
+ // 3. Last resort: infer from the actual data files in the current
snapshot.
+ // This handles migrated tables where none of the above properties
are set.
+ return inferFileFormatFromDataFiles(icebergTable);
+ }
+
+ private static String inferFileFormatFromDataFiles(Table icebergTable) {
+ if (icebergTable.currentSnapshot() == null) {
+ LOG.info("Iceberg table {} has no snapshot, defaulting to {}",
icebergTable.name(), PARQUET_NAME);
+ return PARQUET_NAME;
+ }
+ try (CloseableIterable<FileScanTask> files =
icebergTable.newScan().planFiles()) {
+ java.util.Iterator<FileScanTask> it = files.iterator();
+ if (it.hasNext()) {
+ String format = it.next().file().format().name().toLowerCase();
+ LOG.info("Iceberg table {} inferred file format {} from data
files", icebergTable.name(), format);
+ return format;
+ }
+ } catch (Exception e) {
+ LOG.warn("Failed to infer file format from data files for table
{}, defaulting to {}",
+ icebergTable.name(), PARQUET_NAME, e);
+ }
+ return PARQUET_NAME;
+ }
+
public static String getFileCompress(Table table) {
Map<String, String> properties = table.properties();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]