This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 4e1b749765 [iceberg] Use gzip by default to iceberg avro writer (#4620)
4e1b749765 is described below
commit 4e1b74976590fbe5d885614b65e038217c9266df
Author: Jingsong Lee <[email protected]>
AuthorDate: Mon Dec 2 18:13:33 2024 +0800
[iceberg] Use gzip by default to iceberg avro writer (#4620)
---
docs/content/migration/iceberg-compatibility.md | 6 ++++++
.../java/org/apache/paimon/iceberg/IcebergOptions.java | 7 +++++++
.../paimon/iceberg/manifest/IcebergManifestFile.java | 15 +++++++++++----
.../paimon/iceberg/manifest/IcebergManifestList.java | 15 +++++++++++----
.../apache/paimon/iceberg/IcebergCompatibilityTest.java | 6 ++++++
5 files changed, 41 insertions(+), 8 deletions(-)
diff --git a/docs/content/migration/iceberg-compatibility.md
b/docs/content/migration/iceberg-compatibility.md
index f07f78cb20..7b83936b53 100644
--- a/docs/content/migration/iceberg-compatibility.md
+++ b/docs/content/migration/iceberg-compatibility.md
@@ -371,6 +371,12 @@ you also need to set some (or all) of the following table
options when creating
<td>String</td>
<td>hadoop-conf-dir for Iceberg Hive catalog.</td>
</tr>
+ <tr>
+ <td><h5>metadata.iceberg.manifest-compression</h5></td>
+ <td style="word-wrap: break-word;">gzip</td>
+ <td>String</td>
+ <td>Compression for Iceberg manifest files.</td>
+ </tr>
</tbody>
</table>
diff --git
a/paimon-core/src/main/java/org/apache/paimon/iceberg/IcebergOptions.java
b/paimon-core/src/main/java/org/apache/paimon/iceberg/IcebergOptions.java
index 769ce6b161..3900233d21 100644
--- a/paimon-core/src/main/java/org/apache/paimon/iceberg/IcebergOptions.java
+++ b/paimon-core/src/main/java/org/apache/paimon/iceberg/IcebergOptions.java
@@ -70,6 +70,13 @@ public class IcebergOptions {
.noDefaultValue()
.withDescription("hadoop-conf-dir for Iceberg Hive
catalog.");
+ public static final ConfigOption<String> MANIFEST_COMPRESSION =
+ key("metadata.iceberg.manifest-compression")
+ .stringType()
+ .defaultValue(
+ "gzip") // some Iceberg reader cannot support
zstd, for example DuckDB
+ .withDescription("Compression for Iceberg manifest
files.");
+
/** Where to store Iceberg metadata. */
public enum StorageType implements DescribedEnum {
DISABLED("disabled", "Disable Iceberg compatibility support."),
diff --git
a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
index d04cf3576a..57484a1f3f 100644
---
a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
+++
b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
@@ -19,6 +19,7 @@
package org.apache.paimon.iceberg.manifest;
import org.apache.paimon.CoreOptions;
+import org.apache.paimon.annotation.VisibleForTesting;
import org.apache.paimon.format.FileFormat;
import org.apache.paimon.format.FormatReaderFactory;
import org.apache.paimon.format.FormatWriterFactory;
@@ -26,6 +27,7 @@ import org.apache.paimon.format.SimpleColStats;
import org.apache.paimon.format.SimpleStatsCollector;
import org.apache.paimon.fs.FileIO;
import org.apache.paimon.fs.Path;
+import org.apache.paimon.iceberg.IcebergOptions;
import org.apache.paimon.iceberg.IcebergPathFactory;
import org.apache.paimon.iceberg.manifest.IcebergManifestFileMeta.Content;
import org.apache.paimon.iceberg.metadata.IcebergPartitionSpec;
@@ -82,23 +84,28 @@ public class IcebergManifestFile extends
ObjectsFile<IcebergManifestEntry> {
this.targetFileSize = targetFileSize;
}
+ @VisibleForTesting
+ public String compression() {
+ return compression;
+ }
+
public static IcebergManifestFile create(FileStoreTable table,
IcebergPathFactory pathFactory) {
RowType partitionType = table.schema().logicalPartitionType();
RowType entryType = IcebergManifestEntry.schema(partitionType);
- Options manifestFileAvroOptions = Options.fromMap(table.options());
+ Options avroOptions = Options.fromMap(table.options());
//
https://github.com/apache/iceberg/blob/main/core/src/main/java/org/apache/iceberg/ManifestReader.java
- manifestFileAvroOptions.set(
+ avroOptions.set(
"avro.row-name-mapping",
"org.apache.paimon.avro.generated.record:manifest_entry,"
+ "manifest_entry_data_file:r2,"
+ "r2_partition:r102");
- FileFormat manifestFileAvro = FileFormat.fromIdentifier("avro",
manifestFileAvroOptions);
+ FileFormat manifestFileAvro = FileFormat.fromIdentifier("avro",
avroOptions);
return new IcebergManifestFile(
table.fileIO(),
partitionType,
manifestFileAvro.createReaderFactory(entryType),
manifestFileAvro.createWriterFactory(entryType),
- table.coreOptions().manifestCompression(),
+ avroOptions.get(IcebergOptions.MANIFEST_COMPRESSION),
pathFactory.manifestFileFactory(),
table.coreOptions().manifestTargetSize());
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestList.java
b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestList.java
index 911ebf954c..ab5cc926cd 100644
---
a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestList.java
+++
b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestList.java
@@ -18,10 +18,12 @@
package org.apache.paimon.iceberg.manifest;
+import org.apache.paimon.annotation.VisibleForTesting;
import org.apache.paimon.format.FileFormat;
import org.apache.paimon.format.FormatReaderFactory;
import org.apache.paimon.format.FormatWriterFactory;
import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.iceberg.IcebergOptions;
import org.apache.paimon.iceberg.IcebergPathFactory;
import org.apache.paimon.options.Options;
import org.apache.paimon.table.FileStoreTable;
@@ -51,19 +53,24 @@ public class IcebergManifestList extends
ObjectsFile<IcebergManifestFileMeta> {
null);
}
+ @VisibleForTesting
+ public String compression() {
+ return compression;
+ }
+
public static IcebergManifestList create(FileStoreTable table,
IcebergPathFactory pathFactory) {
- Options manifestListAvroOptions = Options.fromMap(table.options());
+ Options avroOptions = Options.fromMap(table.options());
//
https://github.com/apache/iceberg/blob/main/core/src/main/java/org/apache/iceberg/ManifestLists.java
- manifestListAvroOptions.set(
+ avroOptions.set(
"avro.row-name-mapping",
"org.apache.paimon.avro.generated.record:manifest_file,"
+ "manifest_file_partitions:r508");
- FileFormat manifestListAvro = FileFormat.fromIdentifier("avro",
manifestListAvroOptions);
+ FileFormat manifestListAvro = FileFormat.fromIdentifier("avro",
avroOptions);
return new IcebergManifestList(
table.fileIO(),
manifestListAvro.createReaderFactory(IcebergManifestFileMeta.schema()),
manifestListAvro.createWriterFactory(IcebergManifestFileMeta.schema()),
- table.coreOptions().manifestCompression(),
+ avroOptions.get(IcebergOptions.MANIFEST_COMPRESSION),
pathFactory.manifestListFactory());
}
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
b/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
index 9a27d56184..45cfe109b9 100644
---
a/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
@@ -30,6 +30,7 @@ import org.apache.paimon.data.Timestamp;
import org.apache.paimon.disk.IOManagerImpl;
import org.apache.paimon.fs.Path;
import org.apache.paimon.fs.local.LocalFileIO;
+import org.apache.paimon.iceberg.manifest.IcebergManifestFile;
import org.apache.paimon.iceberg.manifest.IcebergManifestFileMeta;
import org.apache.paimon.iceberg.manifest.IcebergManifestList;
import org.apache.paimon.iceberg.metadata.IcebergMetadata;
@@ -302,6 +303,11 @@ public class IcebergCompatibilityTest {
IcebergPathFactory pathFactory =
new IcebergPathFactory(new Path(table.location(), "metadata"));
IcebergManifestList manifestList = IcebergManifestList.create(table,
pathFactory);
+ assertThat(manifestList.compression()).isEqualTo("gzip");
+
+ IcebergManifestFile manifestFile = IcebergManifestFile.create(table,
pathFactory);
+ assertThat(manifestFile.compression()).isEqualTo("gzip");
+
Set<String> usingManifests = new HashSet<>();
for (IcebergManifestFileMeta fileMeta :
manifestList.read(new
Path(metadata.currentSnapshot().manifestList()).getName())) {