This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 094b62c66 [core] Stats mode should not work for partition stats (#1499)
094b62c66 is described below
commit 094b62c66e6b0cb64ebd23e691efd067a0a437ba
Author: Jingsong Lee <[email protected]>
AuthorDate: Wed Jul 5 15:38:32 2023 +0800
[core] Stats mode should not work for partition stats (#1499)
---
.../apache/paimon/format/TableStatsCollector.java | 5 ++
.../paimon/statistics/FieldStatsCollector.java | 7 ++
...ctor.java => TableFieldStatsExtractorTest.java} | 2 +-
.../java/org/apache/paimon/AbstractFileStore.java | 5 +-
.../org/apache/paimon/manifest/ManifestFile.java | 25 ++----
.../paimon/utils/StatsCollectorFactories.java | 8 --
.../apache/paimon/io/RollingFileWriterTest.java | 3 +-
.../paimon/manifest/ManifestFileMetaTestBase.java | 7 +-
.../apache/paimon/manifest/ManifestFileTest.java | 7 +-
.../org/apache/paimon/stats/StatsTableTest.java | 90 ++++++++++++++++++++++
.../format/orc/OrcTableStatsExtractorTest.java | 4 +-
.../parquet/ParquetTableStatsExtractorTest.java | 4 +-
12 files changed, 118 insertions(+), 49 deletions(-)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/format/TableStatsCollector.java
b/paimon-common/src/main/java/org/apache/paimon/format/TableStatsCollector.java
index c8f7322a1..6f8661815 100644
---
a/paimon-common/src/main/java/org/apache/paimon/format/TableStatsCollector.java
+++
b/paimon-common/src/main/java/org/apache/paimon/format/TableStatsCollector.java
@@ -25,6 +25,7 @@ import org.apache.paimon.statistics.FieldStatsCollector;
import org.apache.paimon.types.RowType;
import org.apache.paimon.utils.RowDataToObjectArrayConverter;
+import static
org.apache.paimon.statistics.FieldStatsCollector.createFullStatsFactories;
import static org.apache.paimon.utils.Preconditions.checkArgument;
/** Collector to extract statistics of each fields from a series of records. */
@@ -34,6 +35,10 @@ public class TableStatsCollector {
private final FieldStatsCollector[] statsCollectors;
private final Serializer<Object>[] fieldSerializers;
+ public TableStatsCollector(RowType rowType) {
+ this(rowType, createFullStatsFactories(rowType.getFieldCount()));
+ }
+
public TableStatsCollector(RowType rowType, FieldStatsCollector.Factory[]
collectorFactory) {
int numFields = rowType.getFieldCount();
checkArgument(
diff --git
a/paimon-common/src/main/java/org/apache/paimon/statistics/FieldStatsCollector.java
b/paimon-common/src/main/java/org/apache/paimon/statistics/FieldStatsCollector.java
index 99ce52285..52015d4c3 100644
---
a/paimon-common/src/main/java/org/apache/paimon/statistics/FieldStatsCollector.java
+++
b/paimon-common/src/main/java/org/apache/paimon/statistics/FieldStatsCollector.java
@@ -23,6 +23,7 @@ package org.apache.paimon.statistics;
import org.apache.paimon.data.serializer.Serializer;
import org.apache.paimon.format.FieldStats;
+import java.util.Arrays;
import java.util.regex.Matcher;
import static
org.apache.paimon.statistics.TruncateFieldStatsCollector.TRUNCATE_PATTERN;
@@ -80,4 +81,10 @@ public interface FieldStatsCollector {
throw new IllegalArgumentException("Unexpected option: " +
option);
}
}
+
+ static FieldStatsCollector.Factory[] createFullStatsFactories(int
numFields) {
+ FieldStatsCollector.Factory[] factories = new
FieldStatsCollector.Factory[numFields];
+ Arrays.fill(factories, (FieldStatsCollector.Factory)
FullFieldStatsCollector::new);
+ return factories;
+ }
}
diff --git
a/paimon-common/src/test/java/org/apache/paimon/format/TableFieldStatsExtractorTestBaseCollector.java
b/paimon-common/src/test/java/org/apache/paimon/format/TableFieldStatsExtractorTest.java
similarity index 99%
rename from
paimon-common/src/test/java/org/apache/paimon/format/TableFieldStatsExtractorTestBaseCollector.java
rename to
paimon-common/src/test/java/org/apache/paimon/format/TableFieldStatsExtractorTest.java
index 55c55625d..573f0de2f 100644
---
a/paimon-common/src/test/java/org/apache/paimon/format/TableFieldStatsExtractorTestBaseCollector.java
+++
b/paimon-common/src/test/java/org/apache/paimon/format/TableFieldStatsExtractorTest.java
@@ -63,7 +63,7 @@ import static
org.apache.paimon.types.DataTypeChecks.getPrecision;
import static org.assertj.core.api.Assertions.assertThat;
/** Tests for {@link TableStatsExtractor}. */
-public abstract class TableFieldStatsExtractorTestBaseCollector {
+public abstract class TableFieldStatsExtractorTest {
@TempDir java.nio.file.Path tempDir;
diff --git a/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
b/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
index bce0d9b1e..558daa899 100644
--- a/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
+++ b/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
@@ -38,7 +38,6 @@ import org.apache.paimon.types.RowType;
import org.apache.paimon.utils.FileStorePathFactory;
import org.apache.paimon.utils.SegmentsCache;
import org.apache.paimon.utils.SnapshotManager;
-import org.apache.paimon.utils.StatsCollectorFactories;
import org.apache.paimon.utils.TagManager;
import javax.annotation.Nullable;
@@ -105,9 +104,7 @@ public abstract class AbstractFileStore<T> implements
FileStore<T> {
options.manifestFormat(),
pathFactory(),
options.manifestTargetSize().getBytes(),
- forWrite ? writeManifestCache : null,
- StatsCollectorFactories.createStatsFactories(
- options, partitionType.getFieldNames()));
+ forWrite ? writeManifestCache : null);
}
@VisibleForTesting
diff --git
a/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
b/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
index e41184d1e..e5da04c94 100644
--- a/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
+++ b/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
@@ -29,7 +29,6 @@ import org.apache.paimon.fs.Path;
import org.apache.paimon.io.RollingFileWriter;
import org.apache.paimon.io.SingleFileWriter;
import org.apache.paimon.schema.SchemaManager;
-import org.apache.paimon.statistics.FieldStatsCollector;
import org.apache.paimon.stats.FieldStatsArraySerializer;
import org.apache.paimon.types.RowType;
import org.apache.paimon.utils.FileStorePathFactory;
@@ -53,7 +52,6 @@ public class ManifestFile extends ObjectsFile<ManifestEntry> {
private final RowType partitionType;
private final FormatWriterFactory writerFactory;
private final long suggestedFileSize;
- private final FieldStatsCollector.Factory[] partitionStats;
private ManifestFile(
FileIO fileIO,
@@ -64,14 +62,12 @@ public class ManifestFile extends
ObjectsFile<ManifestEntry> {
FormatWriterFactory writerFactory,
PathFactory pathFactory,
long suggestedFileSize,
- @Nullable SegmentsCache<String> cache,
- FieldStatsCollector.Factory[] partitionStats) {
+ @Nullable SegmentsCache<String> cache) {
super(fileIO, serializer, readerFactory, writerFactory, pathFactory,
cache);
this.schemaManager = schemaManager;
this.partitionType = partitionType;
this.writerFactory = writerFactory;
this.suggestedFileSize = suggestedFileSize;
- this.partitionStats = partitionStats;
}
@VisibleForTesting
@@ -91,8 +87,7 @@ public class ManifestFile extends ObjectsFile<ManifestEntry> {
new ManifestEntryWriter(
writerFactory,
pathFactory.newPath(),
-
CoreOptions.FILE_COMPRESSION.defaultValue(),
- partitionStats),
+
CoreOptions.FILE_COMPRESSION.defaultValue()),
suggestedFileSize);
try {
writer.write(entries);
@@ -112,14 +107,10 @@ public class ManifestFile extends
ObjectsFile<ManifestEntry> {
private long numDeletedFiles = 0;
private long schemaId = Long.MIN_VALUE;
- ManifestEntryWriter(
- FormatWriterFactory factory,
- Path path,
- String fileCompression,
- FieldStatsCollector.Factory[] partitionStats) {
+ ManifestEntryWriter(FormatWriterFactory factory, Path path, String
fileCompression) {
super(ManifestFile.this.fileIO, factory, path, serializer::toRow,
fileCompression);
- this.partitionStatsCollector = new
TableStatsCollector(partitionType, partitionStats);
+ this.partitionStatsCollector = new
TableStatsCollector(partitionType);
this.partitionStatsSerializer = new
FieldStatsArraySerializer(partitionType);
}
@@ -166,7 +157,6 @@ public class ManifestFile extends
ObjectsFile<ManifestEntry> {
private final FileStorePathFactory pathFactory;
private final long suggestedFileSize;
@Nullable private final SegmentsCache<String> cache;
- private final FieldStatsCollector.Factory[] partitionStats;
public Factory(
FileIO fileIO,
@@ -175,8 +165,7 @@ public class ManifestFile extends
ObjectsFile<ManifestEntry> {
FileFormat fileFormat,
FileStorePathFactory pathFactory,
long suggestedFileSize,
- @Nullable SegmentsCache<String> cache,
- FieldStatsCollector.Factory[] partitionStats) {
+ @Nullable SegmentsCache<String> cache) {
this.fileIO = fileIO;
this.schemaManager = schemaManager;
this.partitionType = partitionType;
@@ -184,7 +173,6 @@ public class ManifestFile extends
ObjectsFile<ManifestEntry> {
this.pathFactory = pathFactory;
this.suggestedFileSize = suggestedFileSize;
this.cache = cache;
- this.partitionStats = partitionStats;
}
public ManifestFile create() {
@@ -198,8 +186,7 @@ public class ManifestFile extends
ObjectsFile<ManifestEntry> {
fileFormat.createWriterFactory(entryType),
pathFactory.manifestFileFactory(),
suggestedFileSize,
- cache,
- partitionStats);
+ cache);
}
}
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/utils/StatsCollectorFactories.java
b/paimon-core/src/main/java/org/apache/paimon/utils/StatsCollectorFactories.java
index 08b3a780c..7a3abc3d7 100644
---
a/paimon-core/src/main/java/org/apache/paimon/utils/StatsCollectorFactories.java
+++
b/paimon-core/src/main/java/org/apache/paimon/utils/StatsCollectorFactories.java
@@ -23,9 +23,7 @@ package org.apache.paimon.utils;
import org.apache.paimon.CoreOptions;
import org.apache.paimon.options.Options;
import org.apache.paimon.statistics.FieldStatsCollector;
-import org.apache.paimon.statistics.FullFieldStatsCollector;
-import java.util.Arrays;
import java.util.List;
import static org.apache.paimon.CoreOptions.FIELDS_PREFIX;
@@ -55,10 +53,4 @@ public class StatsCollectorFactories {
}
return modes;
}
-
- public static FieldStatsCollector.Factory[] createFullStatsFactories(int
numFields) {
- FieldStatsCollector.Factory[] factories = new
FieldStatsCollector.Factory[numFields];
- Arrays.fill(factories, (FieldStatsCollector.Factory)
FullFieldStatsCollector::new);
- return factories;
- }
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/io/RollingFileWriterTest.java
b/paimon-core/src/test/java/org/apache/paimon/io/RollingFileWriterTest.java
index 78d1cbf99..8d42d20e8 100644
--- a/paimon-core/src/test/java/org/apache/paimon/io/RollingFileWriterTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/io/RollingFileWriterTest.java
@@ -25,6 +25,7 @@ import org.apache.paimon.format.FileFormat;
import org.apache.paimon.fs.Path;
import org.apache.paimon.fs.local.LocalFileIO;
import org.apache.paimon.options.Options;
+import org.apache.paimon.statistics.FieldStatsCollector;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.IntType;
import org.apache.paimon.types.RowType;
@@ -79,7 +80,7 @@ public class RollingFileWriterTest {
fileFormat
.createStatsExtractor(
SCHEMA,
- StatsCollectorFactories
+ FieldStatsCollector
.createFullStatsFactories(
SCHEMA.getFieldCount()))
.orElse(null),
diff --git
a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java
b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java
index 5ff3f8734..e66a6eccb 100644
---
a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java
+++
b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java
@@ -32,12 +32,10 @@ import org.apache.paimon.schema.SchemaManager;
import org.apache.paimon.stats.StatsTestUtils;
import org.apache.paimon.types.RowType;
import org.apache.paimon.utils.FileStorePathFactory;
-import org.apache.paimon.utils.StatsCollectorFactories;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
-import java.util.HashMap;
import java.util.List;
import java.util.stream.Collectors;
@@ -136,10 +134,7 @@ public abstract class ManifestFileMetaTestBase {
"default",
CoreOptions.FILE_FORMAT.defaultValue().toString()),
Long.MAX_VALUE,
- null,
- StatsCollectorFactories.createStatsFactories(
- new CoreOptions(new HashMap<>()),
- getPartitionType().getFieldNames()))
+ null)
.create();
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java
b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java
index 105914ebc..5be026b7c 100644
--- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java
@@ -29,14 +29,12 @@ import org.apache.paimon.schema.SchemaManager;
import org.apache.paimon.stats.StatsTestUtils;
import org.apache.paimon.utils.FailingFileIO;
import org.apache.paimon.utils.FileStorePathFactory;
-import org.apache.paimon.utils.StatsCollectorFactories;
import org.junit.jupiter.api.RepeatedTest;
import org.junit.jupiter.api.io.TempDir;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.ThreadLocalRandom;
@@ -110,10 +108,7 @@ public class ManifestFileTest {
avro,
pathFactory,
suggestedFileSize,
- null,
- StatsCollectorFactories.createStatsFactories(
- new CoreOptions(new HashMap<>()),
- DEFAULT_PART_TYPE.getFieldNames()))
+ null)
.create();
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/stats/StatsTableTest.java
b/paimon-core/src/test/java/org/apache/paimon/stats/StatsTableTest.java
new file mode 100644
index 000000000..23ff1e822
--- /dev/null
+++ b/paimon-core/src/test/java/org/apache/paimon/stats/StatsTableTest.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.stats;
+
+import org.apache.paimon.AbstractFileStore;
+import org.apache.paimon.catalog.Identifier;
+import org.apache.paimon.data.GenericRow;
+import org.apache.paimon.io.DataFileMeta;
+import org.apache.paimon.manifest.ManifestFile;
+import org.apache.paimon.manifest.ManifestFileMeta;
+import org.apache.paimon.manifest.ManifestList;
+import org.apache.paimon.options.Options;
+import org.apache.paimon.schema.Schema;
+import org.apache.paimon.table.AbstractFileStoreTable;
+import org.apache.paimon.table.Table;
+import org.apache.paimon.table.TableTestBase;
+import org.apache.paimon.types.DataTypes;
+
+import org.junit.jupiter.api.Test;
+
+import static org.apache.paimon.CoreOptions.METADATA_STATS_MODE;
+import static org.assertj.core.api.Assertions.assertThat;
+
+/** Test for table stats mode. */
+public class StatsTableTest extends TableTestBase {
+
+ @Test
+ public void testPartitionStats() throws Exception {
+ Identifier identifier = identifier("T");
+ Options options = new Options();
+ options.set(METADATA_STATS_MODE, "NONE");
+ Schema schema =
+ Schema.newBuilder()
+ .column("pt", DataTypes.INT())
+ .column("pk", DataTypes.INT())
+ .column("col1", DataTypes.INT())
+ .partitionKeys("pt")
+ .primaryKey("pk", "pt")
+ .options(options.toMap())
+ .build();
+ catalog.createTable(identifier, schema, true);
+ Table table = catalog.getTable(identifier);
+
+ write(
+ table,
+ GenericRow.of(1, 1, 1),
+ GenericRow.of(1, 2, 1),
+ GenericRow.of(1, 3, 1),
+ GenericRow.of(2, 1, 1));
+
+ AbstractFileStoreTable storeTable = (AbstractFileStoreTable) table;
+ AbstractFileStore<?> store = (AbstractFileStore<?>) storeTable.store();
+ String manifestListFile =
storeTable.snapshotManager().latestSnapshot().deltaManifestList();
+
+ ManifestList manifestList = store.manifestListFactory().create();
+ ManifestFileMeta manifest = manifestList.read(manifestListFile).get(0);
+
+ // should have partition stats
+ BinaryTableStats partitionStats = manifest.partitionStats();
+ assertThat(partitionStats.min().getInt(0)).isEqualTo(1);
+ assertThat(partitionStats.max().getInt(0)).isEqualTo(2);
+
+ // should not have record stats because of NONE mode
+ ManifestFile manifestFile = store.manifestFileFactory().create();
+ DataFileMeta file =
manifestFile.read(manifest.fileName()).get(0).file();
+ BinaryTableStats recordStats = file.valueStats();
+ assertThat(recordStats.min().isNullAt(0)).isTrue();
+ assertThat(recordStats.min().isNullAt(1)).isTrue();
+ assertThat(recordStats.min().isNullAt(2)).isTrue();
+ assertThat(recordStats.max().isNullAt(0)).isTrue();
+ assertThat(recordStats.max().isNullAt(1)).isTrue();
+ assertThat(recordStats.max().isNullAt(2)).isTrue();
+ }
+}
diff --git
a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcTableStatsExtractorTest.java
b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcTableStatsExtractorTest.java
index 270d36fac..cee203a57 100644
---
a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcTableStatsExtractorTest.java
+++
b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcTableStatsExtractorTest.java
@@ -19,7 +19,7 @@
package org.apache.paimon.format.orc;
import org.apache.paimon.format.FileFormat;
-import org.apache.paimon.format.TableFieldStatsExtractorTestBaseCollector;
+import org.apache.paimon.format.TableFieldStatsExtractorTest;
import org.apache.paimon.format.orc.filter.OrcTableStatsExtractor;
import org.apache.paimon.options.Options;
import org.apache.paimon.types.ArrayType;
@@ -44,7 +44,7 @@ import org.apache.paimon.types.VarBinaryType;
import org.apache.paimon.types.VarCharType;
/** Tests for {@link OrcTableStatsExtractor}. */
-public class OrcTableStatsExtractorTest extends
TableFieldStatsExtractorTestBaseCollector {
+public class OrcTableStatsExtractorTest extends TableFieldStatsExtractorTest {
@Override
protected FileFormat createFormat() {
diff --git
a/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetTableStatsExtractorTest.java
b/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetTableStatsExtractorTest.java
index be5c321d6..784833eec 100644
---
a/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetTableStatsExtractorTest.java
+++
b/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetTableStatsExtractorTest.java
@@ -20,7 +20,7 @@ package org.apache.paimon.format.parquet;
import org.apache.paimon.format.FieldStats;
import org.apache.paimon.format.FileFormat;
-import org.apache.paimon.format.TableFieldStatsExtractorTestBaseCollector;
+import org.apache.paimon.format.TableFieldStatsExtractorTest;
import org.apache.paimon.options.Options;
import org.apache.paimon.types.ArrayType;
import org.apache.paimon.types.BigIntType;
@@ -43,7 +43,7 @@ import org.apache.paimon.types.VarBinaryType;
import org.apache.paimon.types.VarCharType;
/** Tests for {@link ParquetTableStatsExtractor}. */
-public class ParquetTableStatsExtractorTest extends
TableFieldStatsExtractorTestBaseCollector {
+public class ParquetTableStatsExtractorTest extends
TableFieldStatsExtractorTest {
@Override
protected FileFormat createFormat() {