This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new bfd0b28b5b [core] Validate retract records in PostponeBucketWriter
(#5892)
bfd0b28b5b is described below
commit bfd0b28b5be301411b4838d92e4527959b76a78e
Author: Jingsong Lee <[email protected]>
AuthorDate: Tue Jul 15 10:49:34 2025 +0800
[core] Validate retract records in PostponeBucketWriter (#5892)
---
.../java/org/apache/paimon/KeyValueFileStore.java | 1 +
.../paimon/mergetree/compact/MergeFunction.java | 1 +
.../postpone/PostponeBucketFileStoreWrite.java | 5 ++++
.../paimon/postpone/PostponeBucketWriter.java | 18 +++++++++++
.../paimon/flink/PostponeBucketTableITCase.java | 35 ++++++++++++++++++++++
5 files changed, 60 insertions(+)
diff --git a/paimon-core/src/main/java/org/apache/paimon/KeyValueFileStore.java
b/paimon-core/src/main/java/org/apache/paimon/KeyValueFileStore.java
index 8b94890c92..d157393656 100644
--- a/paimon-core/src/main/java/org/apache/paimon/KeyValueFileStore.java
+++ b/paimon-core/src/main/java/org/apache/paimon/KeyValueFileStore.java
@@ -173,6 +173,7 @@ public class KeyValueFileStore extends
AbstractFileStore<KeyValue> {
partitionType,
keyType,
valueType,
+ mfFactory,
this::pathFactory,
newReaderFactoryBuilder(),
snapshotManager(),
diff --git
a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/MergeFunction.java
b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/MergeFunction.java
index 23efc046d7..1387465037 100644
---
a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/MergeFunction.java
+++
b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/MergeFunction.java
@@ -48,5 +48,6 @@ public interface MergeFunction<T> {
/** Get current merged value. */
T getResult();
+ /** Require copy input kv, this may cache kv in memory. */
boolean requireCopy();
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/postpone/PostponeBucketFileStoreWrite.java
b/paimon-core/src/main/java/org/apache/paimon/postpone/PostponeBucketFileStoreWrite.java
index 97b5dfe7c2..da7287ad96 100644
---
a/paimon-core/src/main/java/org/apache/paimon/postpone/PostponeBucketFileStoreWrite.java
+++
b/paimon-core/src/main/java/org/apache/paimon/postpone/PostponeBucketFileStoreWrite.java
@@ -29,6 +29,7 @@ import org.apache.paimon.io.DataFileMeta;
import org.apache.paimon.io.KeyValueFileReaderFactory;
import org.apache.paimon.io.KeyValueFileWriterFactory;
import org.apache.paimon.mergetree.compact.ConcatRecordReader;
+import org.apache.paimon.mergetree.compact.MergeFunctionFactory;
import org.apache.paimon.operation.FileStoreScan;
import org.apache.paimon.operation.FileStoreWrite;
import org.apache.paimon.operation.MemoryFileStoreWrite;
@@ -71,6 +72,7 @@ public class PostponeBucketFileStoreWrite extends
MemoryFileStoreWrite<KeyValue>
private final KeyValueFileWriterFactory.Builder writerFactoryBuilder;
private final FileIO fileIO;
private final FileStorePathFactory pathFactory;
+ private final MergeFunctionFactory<KeyValue> mfFactory;
private final KeyValueFileReaderFactory.Builder readerFactoryBuilder;
private boolean forceBufferSpill = false;
@@ -83,6 +85,7 @@ public class PostponeBucketFileStoreWrite extends
MemoryFileStoreWrite<KeyValue>
RowType partitionType,
RowType keyType,
RowType valueType,
+ MergeFunctionFactory<KeyValue> mfFactory,
BiFunction<CoreOptions, String, FileStorePathFactory>
formatPathFactory,
KeyValueFileReaderFactory.Builder readerFactoryBuilder,
SnapshotManager snapshotManager,
@@ -93,6 +96,7 @@ public class PostponeBucketFileStoreWrite extends
MemoryFileStoreWrite<KeyValue>
super(snapshotManager, scan, options, partitionType, null, null,
tableName);
this.fileIO = fileIO;
this.pathFactory = pathFactory;
+ this.mfFactory = mfFactory;
this.readerFactoryBuilder = readerFactoryBuilder;
Options newOptions = new Options(options.toMap());
@@ -189,6 +193,7 @@ public class PostponeBucketFileStoreWrite extends
MemoryFileStoreWrite<KeyValue>
options.spillCompressOptions(),
options.writeBufferSpillDiskSize(),
ioManager,
+ mfFactory.create(),
writerFactory,
files -> newFileRead(partition, bucket, files),
forceBufferSpill,
diff --git
a/paimon-core/src/main/java/org/apache/paimon/postpone/PostponeBucketWriter.java
b/paimon-core/src/main/java/org/apache/paimon/postpone/PostponeBucketWriter.java
index c41db8aac7..47256749b9 100644
---
a/paimon-core/src/main/java/org/apache/paimon/postpone/PostponeBucketWriter.java
+++
b/paimon-core/src/main/java/org/apache/paimon/postpone/PostponeBucketWriter.java
@@ -33,6 +33,7 @@ import org.apache.paimon.io.RollingFileWriter;
import org.apache.paimon.manifest.FileSource;
import org.apache.paimon.memory.MemoryOwner;
import org.apache.paimon.memory.MemorySegmentPool;
+import org.apache.paimon.mergetree.compact.MergeFunction;
import org.apache.paimon.options.MemorySize;
import org.apache.paimon.reader.RecordReaderIterator;
import org.apache.paimon.types.RowType;
@@ -55,6 +56,7 @@ public class PostponeBucketWriter implements
RecordWriter<KeyValue>, MemoryOwner
private final FileIO fileIO;
private final DataFilePathFactory pathFactory;
+ private final MergeFunction<KeyValue> mergeFunction;
private final KeyValueFileWriterFactory writerFactory;
private final List<DataFileMeta> files;
private final IOFunction<List<DataFileMeta>,
RecordReaderIterator<KeyValue>> fileRead;
@@ -64,6 +66,7 @@ public class PostponeBucketWriter implements
RecordWriter<KeyValue>, MemoryOwner
private SinkWriter<KeyValue> sinkWriter;
private MemorySegmentPool memorySegmentPool;
+ private boolean retractValidated = false;
public PostponeBucketWriter(
FileIO fileIO,
@@ -71,12 +74,14 @@ public class PostponeBucketWriter implements
RecordWriter<KeyValue>, MemoryOwner
CompressOptions spillCompression,
MemorySize maxDiskSize,
@Nullable IOManager ioManager,
+ MergeFunction<KeyValue> mergeFunction,
KeyValueFileWriterFactory writerFactory,
IOFunction<List<DataFileMeta>, RecordReaderIterator<KeyValue>>
fileRead,
boolean useWriteBuffer,
boolean spillable,
@Nullable CommitIncrement restoreIncrement) {
this.ioManager = ioManager;
+ this.mergeFunction = mergeFunction;
this.writerFactory = writerFactory;
this.fileRead = fileRead;
this.fileIO = fileIO;
@@ -99,6 +104,7 @@ public class PostponeBucketWriter implements
RecordWriter<KeyValue>, MemoryOwner
@Override
public void write(KeyValue record) throws Exception {
+ validateRetract(record);
boolean success = sinkWriter.write(record);
if (!success) {
flush();
@@ -112,6 +118,18 @@ public class PostponeBucketWriter implements
RecordWriter<KeyValue>, MemoryOwner
}
}
+ private void validateRetract(KeyValue kv) {
+ if (kv.valueKind().isRetract()) {
+ if (retractValidated) {
+ return;
+ }
+ mergeFunction.reset();
+ mergeFunction.add(kv);
+ mergeFunction.getResult();
+ retractValidated = true;
+ }
+ }
+
private void flush() throws Exception {
files.addAll(sinkWriter.flush());
}
diff --git
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/PostponeBucketTableITCase.java
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/PostponeBucketTableITCase.java
index d2572fb737..4a449e240a 100644
---
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/PostponeBucketTableITCase.java
+++
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/PostponeBucketTableITCase.java
@@ -35,12 +35,47 @@ import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicBoolean;
import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
/** IT cases for postpone bucket tables. */
public class PostponeBucketTableITCase extends AbstractTestBase {
private static final int TIMEOUT = 120;
+ @Test
+ public void testRetractOnPartialUpdate() {
+ String warehouse = getTempDirPath();
+ TableEnvironment tEnv =
+ tableEnvironmentBuilder()
+ .batchMode()
+ .setConf(TableConfigOptions.TABLE_DML_SYNC, true)
+ .build();
+
+ tEnv.executeSql(
+ "CREATE CATALOG mycat WITH (\n"
+ + " 'type' = 'paimon',\n"
+ + " 'warehouse' = '"
+ + warehouse
+ + "'\n"
+ + ")");
+ tEnv.executeSql("USE CATALOG mycat");
+ tEnv.executeSql(
+ "CREATE TABLE T (\n"
+ + " k INT,\n"
+ + " v1 INT,\n"
+ + " v2 INT,\n"
+ + " row_kind_col STRING,\n"
+ + " PRIMARY KEY (k) NOT ENFORCED\n"
+ + ") WITH (\n"
+ + " 'bucket' = '-2',\n"
+ + " 'merge-engine' = 'partial-update',\n"
+ + " 'rowkind.field' = 'row_kind_col'\n"
+ + ")");
+ assertThatThrownBy(() -> tEnv.executeSql("INSERT INTO T VALUES (1, 1,
1, '-D')").await())
+ .rootCause()
+ .hasMessageContaining("By default, Partial update can not
accept delete records");
+ }
+
@Test
public void testWriteThenCompact() throws Exception {
String warehouse = getTempDirPath();