This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 094b62c66 [core] Stats mode should not work for partition stats (#1499)
094b62c66 is described below

commit 094b62c66e6b0cb64ebd23e691efd067a0a437ba
Author: Jingsong Lee <[email protected]>
AuthorDate: Wed Jul 5 15:38:32 2023 +0800

    [core] Stats mode should not work for partition stats (#1499)
---
 .../apache/paimon/format/TableStatsCollector.java  |  5 ++
 .../paimon/statistics/FieldStatsCollector.java     |  7 ++
 ...ctor.java => TableFieldStatsExtractorTest.java} |  2 +-
 .../java/org/apache/paimon/AbstractFileStore.java  |  5 +-
 .../org/apache/paimon/manifest/ManifestFile.java   | 25 ++----
 .../paimon/utils/StatsCollectorFactories.java      |  8 --
 .../apache/paimon/io/RollingFileWriterTest.java    |  3 +-
 .../paimon/manifest/ManifestFileMetaTestBase.java  |  7 +-
 .../apache/paimon/manifest/ManifestFileTest.java   |  7 +-
 .../org/apache/paimon/stats/StatsTableTest.java    | 90 ++++++++++++++++++++++
 .../format/orc/OrcTableStatsExtractorTest.java     |  4 +-
 .../parquet/ParquetTableStatsExtractorTest.java    |  4 +-
 12 files changed, 118 insertions(+), 49 deletions(-)

diff --git 
a/paimon-common/src/main/java/org/apache/paimon/format/TableStatsCollector.java 
b/paimon-common/src/main/java/org/apache/paimon/format/TableStatsCollector.java
index c8f7322a1..6f8661815 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/format/TableStatsCollector.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/format/TableStatsCollector.java
@@ -25,6 +25,7 @@ import org.apache.paimon.statistics.FieldStatsCollector;
 import org.apache.paimon.types.RowType;
 import org.apache.paimon.utils.RowDataToObjectArrayConverter;
 
+import static 
org.apache.paimon.statistics.FieldStatsCollector.createFullStatsFactories;
 import static org.apache.paimon.utils.Preconditions.checkArgument;
 
 /** Collector to extract statistics of each fields from a series of records. */
@@ -34,6 +35,10 @@ public class TableStatsCollector {
     private final FieldStatsCollector[] statsCollectors;
     private final Serializer<Object>[] fieldSerializers;
 
+    public TableStatsCollector(RowType rowType) {
+        this(rowType, createFullStatsFactories(rowType.getFieldCount()));
+    }
+
     public TableStatsCollector(RowType rowType, FieldStatsCollector.Factory[] 
collectorFactory) {
         int numFields = rowType.getFieldCount();
         checkArgument(
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/statistics/FieldStatsCollector.java
 
b/paimon-common/src/main/java/org/apache/paimon/statistics/FieldStatsCollector.java
index 99ce52285..52015d4c3 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/statistics/FieldStatsCollector.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/statistics/FieldStatsCollector.java
@@ -23,6 +23,7 @@ package org.apache.paimon.statistics;
 import org.apache.paimon.data.serializer.Serializer;
 import org.apache.paimon.format.FieldStats;
 
+import java.util.Arrays;
 import java.util.regex.Matcher;
 
 import static 
org.apache.paimon.statistics.TruncateFieldStatsCollector.TRUNCATE_PATTERN;
@@ -80,4 +81,10 @@ public interface FieldStatsCollector {
                 throw new IllegalArgumentException("Unexpected option: " + 
option);
         }
     }
+
+    static FieldStatsCollector.Factory[] createFullStatsFactories(int 
numFields) {
+        FieldStatsCollector.Factory[] factories = new 
FieldStatsCollector.Factory[numFields];
+        Arrays.fill(factories, (FieldStatsCollector.Factory) 
FullFieldStatsCollector::new);
+        return factories;
+    }
 }
diff --git 
a/paimon-common/src/test/java/org/apache/paimon/format/TableFieldStatsExtractorTestBaseCollector.java
 
b/paimon-common/src/test/java/org/apache/paimon/format/TableFieldStatsExtractorTest.java
similarity index 99%
rename from 
paimon-common/src/test/java/org/apache/paimon/format/TableFieldStatsExtractorTestBaseCollector.java
rename to 
paimon-common/src/test/java/org/apache/paimon/format/TableFieldStatsExtractorTest.java
index 55c55625d..573f0de2f 100644
--- 
a/paimon-common/src/test/java/org/apache/paimon/format/TableFieldStatsExtractorTestBaseCollector.java
+++ 
b/paimon-common/src/test/java/org/apache/paimon/format/TableFieldStatsExtractorTest.java
@@ -63,7 +63,7 @@ import static 
org.apache.paimon.types.DataTypeChecks.getPrecision;
 import static org.assertj.core.api.Assertions.assertThat;
 
 /** Tests for {@link TableStatsExtractor}. */
-public abstract class TableFieldStatsExtractorTestBaseCollector {
+public abstract class TableFieldStatsExtractorTest {
 
     @TempDir java.nio.file.Path tempDir;
 
diff --git a/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java 
b/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
index bce0d9b1e..558daa899 100644
--- a/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
+++ b/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
@@ -38,7 +38,6 @@ import org.apache.paimon.types.RowType;
 import org.apache.paimon.utils.FileStorePathFactory;
 import org.apache.paimon.utils.SegmentsCache;
 import org.apache.paimon.utils.SnapshotManager;
-import org.apache.paimon.utils.StatsCollectorFactories;
 import org.apache.paimon.utils.TagManager;
 
 import javax.annotation.Nullable;
@@ -105,9 +104,7 @@ public abstract class AbstractFileStore<T> implements 
FileStore<T> {
                 options.manifestFormat(),
                 pathFactory(),
                 options.manifestTargetSize().getBytes(),
-                forWrite ? writeManifestCache : null,
-                StatsCollectorFactories.createStatsFactories(
-                        options, partitionType.getFieldNames()));
+                forWrite ? writeManifestCache : null);
     }
 
     @VisibleForTesting
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java 
b/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
index e41184d1e..e5da04c94 100644
--- a/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
+++ b/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
@@ -29,7 +29,6 @@ import org.apache.paimon.fs.Path;
 import org.apache.paimon.io.RollingFileWriter;
 import org.apache.paimon.io.SingleFileWriter;
 import org.apache.paimon.schema.SchemaManager;
-import org.apache.paimon.statistics.FieldStatsCollector;
 import org.apache.paimon.stats.FieldStatsArraySerializer;
 import org.apache.paimon.types.RowType;
 import org.apache.paimon.utils.FileStorePathFactory;
@@ -53,7 +52,6 @@ public class ManifestFile extends ObjectsFile<ManifestEntry> {
     private final RowType partitionType;
     private final FormatWriterFactory writerFactory;
     private final long suggestedFileSize;
-    private final FieldStatsCollector.Factory[] partitionStats;
 
     private ManifestFile(
             FileIO fileIO,
@@ -64,14 +62,12 @@ public class ManifestFile extends 
ObjectsFile<ManifestEntry> {
             FormatWriterFactory writerFactory,
             PathFactory pathFactory,
             long suggestedFileSize,
-            @Nullable SegmentsCache<String> cache,
-            FieldStatsCollector.Factory[] partitionStats) {
+            @Nullable SegmentsCache<String> cache) {
         super(fileIO, serializer, readerFactory, writerFactory, pathFactory, 
cache);
         this.schemaManager = schemaManager;
         this.partitionType = partitionType;
         this.writerFactory = writerFactory;
         this.suggestedFileSize = suggestedFileSize;
-        this.partitionStats = partitionStats;
     }
 
     @VisibleForTesting
@@ -91,8 +87,7 @@ public class ManifestFile extends ObjectsFile<ManifestEntry> {
                                 new ManifestEntryWriter(
                                         writerFactory,
                                         pathFactory.newPath(),
-                                        
CoreOptions.FILE_COMPRESSION.defaultValue(),
-                                        partitionStats),
+                                        
CoreOptions.FILE_COMPRESSION.defaultValue()),
                         suggestedFileSize);
         try {
             writer.write(entries);
@@ -112,14 +107,10 @@ public class ManifestFile extends 
ObjectsFile<ManifestEntry> {
         private long numDeletedFiles = 0;
         private long schemaId = Long.MIN_VALUE;
 
-        ManifestEntryWriter(
-                FormatWriterFactory factory,
-                Path path,
-                String fileCompression,
-                FieldStatsCollector.Factory[] partitionStats) {
+        ManifestEntryWriter(FormatWriterFactory factory, Path path, String 
fileCompression) {
             super(ManifestFile.this.fileIO, factory, path, serializer::toRow, 
fileCompression);
 
-            this.partitionStatsCollector = new 
TableStatsCollector(partitionType, partitionStats);
+            this.partitionStatsCollector = new 
TableStatsCollector(partitionType);
             this.partitionStatsSerializer = new 
FieldStatsArraySerializer(partitionType);
         }
 
@@ -166,7 +157,6 @@ public class ManifestFile extends 
ObjectsFile<ManifestEntry> {
         private final FileStorePathFactory pathFactory;
         private final long suggestedFileSize;
         @Nullable private final SegmentsCache<String> cache;
-        private final FieldStatsCollector.Factory[] partitionStats;
 
         public Factory(
                 FileIO fileIO,
@@ -175,8 +165,7 @@ public class ManifestFile extends 
ObjectsFile<ManifestEntry> {
                 FileFormat fileFormat,
                 FileStorePathFactory pathFactory,
                 long suggestedFileSize,
-                @Nullable SegmentsCache<String> cache,
-                FieldStatsCollector.Factory[] partitionStats) {
+                @Nullable SegmentsCache<String> cache) {
             this.fileIO = fileIO;
             this.schemaManager = schemaManager;
             this.partitionType = partitionType;
@@ -184,7 +173,6 @@ public class ManifestFile extends 
ObjectsFile<ManifestEntry> {
             this.pathFactory = pathFactory;
             this.suggestedFileSize = suggestedFileSize;
             this.cache = cache;
-            this.partitionStats = partitionStats;
         }
 
         public ManifestFile create() {
@@ -198,8 +186,7 @@ public class ManifestFile extends 
ObjectsFile<ManifestEntry> {
                     fileFormat.createWriterFactory(entryType),
                     pathFactory.manifestFileFactory(),
                     suggestedFileSize,
-                    cache,
-                    partitionStats);
+                    cache);
         }
     }
 }
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/utils/StatsCollectorFactories.java
 
b/paimon-core/src/main/java/org/apache/paimon/utils/StatsCollectorFactories.java
index 08b3a780c..7a3abc3d7 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/utils/StatsCollectorFactories.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/utils/StatsCollectorFactories.java
@@ -23,9 +23,7 @@ package org.apache.paimon.utils;
 import org.apache.paimon.CoreOptions;
 import org.apache.paimon.options.Options;
 import org.apache.paimon.statistics.FieldStatsCollector;
-import org.apache.paimon.statistics.FullFieldStatsCollector;
 
-import java.util.Arrays;
 import java.util.List;
 
 import static org.apache.paimon.CoreOptions.FIELDS_PREFIX;
@@ -55,10 +53,4 @@ public class StatsCollectorFactories {
         }
         return modes;
     }
-
-    public static FieldStatsCollector.Factory[] createFullStatsFactories(int 
numFields) {
-        FieldStatsCollector.Factory[] factories = new 
FieldStatsCollector.Factory[numFields];
-        Arrays.fill(factories, (FieldStatsCollector.Factory) 
FullFieldStatsCollector::new);
-        return factories;
-    }
 }
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/io/RollingFileWriterTest.java 
b/paimon-core/src/test/java/org/apache/paimon/io/RollingFileWriterTest.java
index 78d1cbf99..8d42d20e8 100644
--- a/paimon-core/src/test/java/org/apache/paimon/io/RollingFileWriterTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/io/RollingFileWriterTest.java
@@ -25,6 +25,7 @@ import org.apache.paimon.format.FileFormat;
 import org.apache.paimon.fs.Path;
 import org.apache.paimon.fs.local.LocalFileIO;
 import org.apache.paimon.options.Options;
+import org.apache.paimon.statistics.FieldStatsCollector;
 import org.apache.paimon.types.DataType;
 import org.apache.paimon.types.IntType;
 import org.apache.paimon.types.RowType;
@@ -79,7 +80,7 @@ public class RollingFileWriterTest {
                                         fileFormat
                                                 .createStatsExtractor(
                                                         SCHEMA,
-                                                        StatsCollectorFactories
+                                                        FieldStatsCollector
                                                                 
.createFullStatsFactories(
                                                                         
SCHEMA.getFieldCount()))
                                                 .orElse(null),
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java
 
b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java
index 5ff3f8734..e66a6eccb 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java
@@ -32,12 +32,10 @@ import org.apache.paimon.schema.SchemaManager;
 import org.apache.paimon.stats.StatsTestUtils;
 import org.apache.paimon.types.RowType;
 import org.apache.paimon.utils.FileStorePathFactory;
-import org.apache.paimon.utils.StatsCollectorFactories;
 
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
 import java.util.stream.Collectors;
 
@@ -136,10 +134,7 @@ public abstract class ManifestFileMetaTestBase {
                                 "default",
                                 
CoreOptions.FILE_FORMAT.defaultValue().toString()),
                         Long.MAX_VALUE,
-                        null,
-                        StatsCollectorFactories.createStatsFactories(
-                                new CoreOptions(new HashMap<>()),
-                                getPartitionType().getFieldNames()))
+                        null)
                 .create();
     }
 
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java 
b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java
index 105914ebc..5be026b7c 100644
--- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java
@@ -29,14 +29,12 @@ import org.apache.paimon.schema.SchemaManager;
 import org.apache.paimon.stats.StatsTestUtils;
 import org.apache.paimon.utils.FailingFileIO;
 import org.apache.paimon.utils.FileStorePathFactory;
-import org.apache.paimon.utils.StatsCollectorFactories;
 
 import org.junit.jupiter.api.RepeatedTest;
 import org.junit.jupiter.api.io.TempDir;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
 import java.util.UUID;
 import java.util.concurrent.ThreadLocalRandom;
@@ -110,10 +108,7 @@ public class ManifestFileTest {
                         avro,
                         pathFactory,
                         suggestedFileSize,
-                        null,
-                        StatsCollectorFactories.createStatsFactories(
-                                new CoreOptions(new HashMap<>()),
-                                DEFAULT_PART_TYPE.getFieldNames()))
+                        null)
                 .create();
     }
 
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/stats/StatsTableTest.java 
b/paimon-core/src/test/java/org/apache/paimon/stats/StatsTableTest.java
new file mode 100644
index 000000000..23ff1e822
--- /dev/null
+++ b/paimon-core/src/test/java/org/apache/paimon/stats/StatsTableTest.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.stats;
+
+import org.apache.paimon.AbstractFileStore;
+import org.apache.paimon.catalog.Identifier;
+import org.apache.paimon.data.GenericRow;
+import org.apache.paimon.io.DataFileMeta;
+import org.apache.paimon.manifest.ManifestFile;
+import org.apache.paimon.manifest.ManifestFileMeta;
+import org.apache.paimon.manifest.ManifestList;
+import org.apache.paimon.options.Options;
+import org.apache.paimon.schema.Schema;
+import org.apache.paimon.table.AbstractFileStoreTable;
+import org.apache.paimon.table.Table;
+import org.apache.paimon.table.TableTestBase;
+import org.apache.paimon.types.DataTypes;
+
+import org.junit.jupiter.api.Test;
+
+import static org.apache.paimon.CoreOptions.METADATA_STATS_MODE;
+import static org.assertj.core.api.Assertions.assertThat;
+
+/** Test for table stats mode. */
+public class StatsTableTest extends TableTestBase {
+
+    @Test
+    public void testPartitionStats() throws Exception {
+        Identifier identifier = identifier("T");
+        Options options = new Options();
+        options.set(METADATA_STATS_MODE, "NONE");
+        Schema schema =
+                Schema.newBuilder()
+                        .column("pt", DataTypes.INT())
+                        .column("pk", DataTypes.INT())
+                        .column("col1", DataTypes.INT())
+                        .partitionKeys("pt")
+                        .primaryKey("pk", "pt")
+                        .options(options.toMap())
+                        .build();
+        catalog.createTable(identifier, schema, true);
+        Table table = catalog.getTable(identifier);
+
+        write(
+                table,
+                GenericRow.of(1, 1, 1),
+                GenericRow.of(1, 2, 1),
+                GenericRow.of(1, 3, 1),
+                GenericRow.of(2, 1, 1));
+
+        AbstractFileStoreTable storeTable = (AbstractFileStoreTable) table;
+        AbstractFileStore<?> store = (AbstractFileStore<?>) storeTable.store();
+        String manifestListFile = 
storeTable.snapshotManager().latestSnapshot().deltaManifestList();
+
+        ManifestList manifestList = store.manifestListFactory().create();
+        ManifestFileMeta manifest = manifestList.read(manifestListFile).get(0);
+
+        // should have partition stats
+        BinaryTableStats partitionStats = manifest.partitionStats();
+        assertThat(partitionStats.min().getInt(0)).isEqualTo(1);
+        assertThat(partitionStats.max().getInt(0)).isEqualTo(2);
+
+        // should not have record stats because of NONE mode
+        ManifestFile manifestFile = store.manifestFileFactory().create();
+        DataFileMeta file = 
manifestFile.read(manifest.fileName()).get(0).file();
+        BinaryTableStats recordStats = file.valueStats();
+        assertThat(recordStats.min().isNullAt(0)).isTrue();
+        assertThat(recordStats.min().isNullAt(1)).isTrue();
+        assertThat(recordStats.min().isNullAt(2)).isTrue();
+        assertThat(recordStats.max().isNullAt(0)).isTrue();
+        assertThat(recordStats.max().isNullAt(1)).isTrue();
+        assertThat(recordStats.max().isNullAt(2)).isTrue();
+    }
+}
diff --git 
a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcTableStatsExtractorTest.java
 
b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcTableStatsExtractorTest.java
index 270d36fac..cee203a57 100644
--- 
a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcTableStatsExtractorTest.java
+++ 
b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcTableStatsExtractorTest.java
@@ -19,7 +19,7 @@
 package org.apache.paimon.format.orc;
 
 import org.apache.paimon.format.FileFormat;
-import org.apache.paimon.format.TableFieldStatsExtractorTestBaseCollector;
+import org.apache.paimon.format.TableFieldStatsExtractorTest;
 import org.apache.paimon.format.orc.filter.OrcTableStatsExtractor;
 import org.apache.paimon.options.Options;
 import org.apache.paimon.types.ArrayType;
@@ -44,7 +44,7 @@ import org.apache.paimon.types.VarBinaryType;
 import org.apache.paimon.types.VarCharType;
 
 /** Tests for {@link OrcTableStatsExtractor}. */
-public class OrcTableStatsExtractorTest extends 
TableFieldStatsExtractorTestBaseCollector {
+public class OrcTableStatsExtractorTest extends TableFieldStatsExtractorTest {
 
     @Override
     protected FileFormat createFormat() {
diff --git 
a/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetTableStatsExtractorTest.java
 
b/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetTableStatsExtractorTest.java
index be5c321d6..784833eec 100644
--- 
a/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetTableStatsExtractorTest.java
+++ 
b/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetTableStatsExtractorTest.java
@@ -20,7 +20,7 @@ package org.apache.paimon.format.parquet;
 
 import org.apache.paimon.format.FieldStats;
 import org.apache.paimon.format.FileFormat;
-import org.apache.paimon.format.TableFieldStatsExtractorTestBaseCollector;
+import org.apache.paimon.format.TableFieldStatsExtractorTest;
 import org.apache.paimon.options.Options;
 import org.apache.paimon.types.ArrayType;
 import org.apache.paimon.types.BigIntType;
@@ -43,7 +43,7 @@ import org.apache.paimon.types.VarBinaryType;
 import org.apache.paimon.types.VarCharType;
 
 /** Tests for {@link ParquetTableStatsExtractor}. */
-public class ParquetTableStatsExtractorTest extends 
TableFieldStatsExtractorTestBaseCollector {
+public class ParquetTableStatsExtractorTest extends 
TableFieldStatsExtractorTest {
 
     @Override
     protected FileFormat createFormat() {

Reply via email to