This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 66ca6984ba [core] Introduce manifest.delete-file-drop-stats (#4640)
66ca6984ba is described below
commit 66ca6984bac38a835bc42175ef8e639841b15476
Author: Jingsong Lee <[email protected]>
AuthorDate: Wed Dec 4 21:14:18 2024 +0800
[core] Introduce manifest.delete-file-drop-stats (#4640)
---
docs/layouts/shortcodes/generated/core_configuration.html | 6 ++++++
.../src/main/java/org/apache/paimon/CoreOptions.java | 12 ++++++++++++
.../src/main/java/org/apache/paimon/AbstractFileStore.java | 1 +
.../append/UnawareAppendTableCompactionCoordinator.java | 4 +++-
.../org/apache/paimon/operation/AbstractFileStoreWrite.java | 9 ++++++++-
.../org/apache/paimon/operation/FileStoreCommitImpl.java | 7 ++++++-
.../org/apache/paimon/operation/MemoryFileStoreWrite.java | 1 +
.../org/apache/paimon/operation/FileStoreCommitTest.java | 1 +
.../org/apache/paimon/spark/commands/PaimonCommand.scala | 5 ++++-
9 files changed, 42 insertions(+), 4 deletions(-)
diff --git a/docs/layouts/shortcodes/generated/core_configuration.html
b/docs/layouts/shortcodes/generated/core_configuration.html
index 2ad5db28b9..6fb2c72650 100644
--- a/docs/layouts/shortcodes/generated/core_configuration.html
+++ b/docs/layouts/shortcodes/generated/core_configuration.html
@@ -453,6 +453,12 @@ Mainly to resolve data skew on primary keys. We recommend
starting with 64 mb wh
<td>String</td>
<td>Default file compression for manifest.</td>
</tr>
+ <tr>
+ <td><h5>manifest.delete-file-drop-stats</h5></td>
+ <td style="word-wrap: break-word;">false</td>
+ <td>Boolean</td>
+ <td>For DELETE manifest entry in manifest file, drop stats to
reduce memory and storage. Default value is false only for compatibility of old
reader.</td>
+ </tr>
<tr>
<td><h5>manifest.format</h5></td>
<td style="word-wrap: break-word;">"avro"</td>
diff --git a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
index cddef33c27..765d5a1e32 100644
--- a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
+++ b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
@@ -1426,6 +1426,14 @@ public class CoreOptions implements Serializable {
.noDefaultValue()
.withDescription("The object location for object table.");
+ public static final ConfigOption<Boolean> MANIFEST_DELETE_FILE_DROP_STATS =
+ key("manifest.delete-file-drop-stats")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription(
+ "For DELETE manifest entry in manifest file, drop
stats to reduce memory and storage."
+ + " Default value is false only for
compatibility of old reader.");
+
@ExcludeFromDocumentation("Only used internally to support materialized
table")
public static final ConfigOption<String>
MATERIALIZED_TABLE_DEFINITION_QUERY =
key("materialized-table.definition-query")
@@ -1947,6 +1955,10 @@ public class CoreOptions implements Serializable {
return lookupStrategy().needLookup;
}
+ public boolean manifestDeleteFileDropStats() {
+ return options.get(MANIFEST_DELETE_FILE_DROP_STATS);
+ }
+
public LookupStrategy lookupStrategy() {
return LookupStrategy.from(
mergeEngine().equals(MergeEngine.FIRST_ROW),
diff --git a/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
b/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
index ae4552aa71..1a538ad89e 100644
--- a/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
+++ b/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
@@ -218,6 +218,7 @@ abstract class AbstractFileStore<T> implements FileStore<T>
{
tableName,
commitUser,
partitionType,
+ options,
options.partitionDefaultName(),
pathFactory(),
snapshotManager(),
diff --git
a/paimon-core/src/main/java/org/apache/paimon/append/UnawareAppendTableCompactionCoordinator.java
b/paimon-core/src/main/java/org/apache/paimon/append/UnawareAppendTableCompactionCoordinator.java
index 5e43568aac..490bda9d4c 100644
---
a/paimon-core/src/main/java/org/apache/paimon/append/UnawareAppendTableCompactionCoordinator.java
+++
b/paimon-core/src/main/java/org/apache/paimon/append/UnawareAppendTableCompactionCoordinator.java
@@ -381,7 +381,9 @@ public class UnawareAppendTableCompactionCoordinator {
snapshotReader.withFilter(filter);
}
// drop stats to reduce memory
- snapshotReader.dropStats();
+ if (table.coreOptions().manifestDeleteFileDropStats()) {
+ snapshotReader.dropStats();
+ }
this.streamingMode = isStreaming;
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreWrite.java
b/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreWrite.java
index 43957de8d6..14dfe75a6e 100644
---
a/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreWrite.java
+++
b/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreWrite.java
@@ -18,6 +18,7 @@
package org.apache.paimon.operation;
+import org.apache.paimon.CoreOptions;
import org.apache.paimon.Snapshot;
import org.apache.paimon.annotation.VisibleForTesting;
import org.apache.paimon.compact.CompactDeletionFile;
@@ -96,13 +97,19 @@ public abstract class AbstractFileStoreWrite<T> implements
FileStoreWrite<T> {
@Nullable IndexMaintainer.Factory<T> indexFactory,
@Nullable DeletionVectorsMaintainer.Factory dvMaintainerFactory,
String tableName,
+ CoreOptions options,
int totalBuckets,
RowType partitionType,
int writerNumberMax,
boolean legacyPartitionName) {
this.snapshotManager = snapshotManager;
+ this.scan = scan;
// Statistic is useless in writer
- this.scan = scan == null ? null : scan.dropStats();
+ if (options.manifestDeleteFileDropStats()) {
+ if (this.scan != null) {
+ this.scan.dropStats();
+ }
+ }
this.indexFactory = indexFactory;
this.dvMaintainerFactory = dvMaintainerFactory;
this.totalBuckets = totalBuckets;
diff --git
a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java
b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java
index bbd9b27ee6..153f9f07e9 100644
---
a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java
+++
b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java
@@ -18,6 +18,7 @@
package org.apache.paimon.operation;
+import org.apache.paimon.CoreOptions;
import org.apache.paimon.Snapshot;
import org.apache.paimon.annotation.VisibleForTesting;
import org.apache.paimon.data.BinaryRow;
@@ -146,6 +147,7 @@ public class FileStoreCommitImpl implements FileStoreCommit
{
String tableName,
String commitUser,
RowType partitionType,
+ CoreOptions options,
String partitionDefaultName,
FileStorePathFactory pathFactory,
SnapshotManager snapshotManager,
@@ -176,8 +178,11 @@ public class FileStoreCommitImpl implements
FileStoreCommit {
this.manifestFile = manifestFileFactory.create();
this.manifestList = manifestListFactory.create();
this.indexManifestFile = indexManifestFileFactory.create();
+ this.scan = scan;
// Stats in DELETE Manifest Entries is useless
- this.scan = scan.dropStats();
+ if (options.manifestDeleteFileDropStats()) {
+ this.scan.dropStats();
+ }
this.numBucket = numBucket;
this.manifestTargetSize = manifestTargetSize;
this.manifestFullCompactionSize = manifestFullCompactionSize;
diff --git
a/paimon-core/src/main/java/org/apache/paimon/operation/MemoryFileStoreWrite.java
b/paimon-core/src/main/java/org/apache/paimon/operation/MemoryFileStoreWrite.java
index ff99f06510..a2733121ee 100644
---
a/paimon-core/src/main/java/org/apache/paimon/operation/MemoryFileStoreWrite.java
+++
b/paimon-core/src/main/java/org/apache/paimon/operation/MemoryFileStoreWrite.java
@@ -73,6 +73,7 @@ public abstract class MemoryFileStoreWrite<T> extends
AbstractFileStoreWrite<T>
indexFactory,
dvMaintainerFactory,
tableName,
+ options,
options.bucket(),
partitionType,
options.writeMaxWritersToSpill(),
diff --git
a/paimon-core/src/test/java/org/apache/paimon/operation/FileStoreCommitTest.java
b/paimon-core/src/test/java/org/apache/paimon/operation/FileStoreCommitTest.java
index de4ee684b8..9e4ba30eb8 100644
---
a/paimon-core/src/test/java/org/apache/paimon/operation/FileStoreCommitTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/operation/FileStoreCommitTest.java
@@ -950,6 +950,7 @@ public class FileStoreCommitTest {
@Test
public void testDropStatsForOverwrite() throws Exception {
TestFileStore store = createStore(false);
+
store.options().toConfiguration().set(CoreOptions.MANIFEST_DELETE_FILE_DROP_STATS,
true);
List<KeyValue> keyValues = generateDataList(1);
BinaryRow partition = gen.getPartition(keyValues.get(0));
diff --git
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonCommand.scala
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonCommand.scala
index 466643b157..87583593e3 100644
---
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonCommand.scala
+++
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonCommand.scala
@@ -94,8 +94,11 @@ trait PaimonCommand extends WithFileStoreTable with
ExpressionHelper with SQLCon
condition: Expression,
output: Seq[Attribute]): Seq[DataSplit] = {
// low level snapshot reader, it can not be affected by 'scan.mode'
+ val snapshotReader = table.newSnapshotReader()
// dropStats after filter push down
- val snapshotReader = table.newSnapshotReader().dropStats()
+ if (table.coreOptions().manifestDeleteFileDropStats()) {
+ snapshotReader.dropStats()
+ }
if (condition != TrueLiteral) {
val filter =
convertConditionToPaimonPredicate(condition, output, rowType,
ignoreFailure = true)