This is an automated email from the ASF dual-hosted git repository. lzljs3620320 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git
commit b498ec9b00b31952f41535d3c6b965a421b57401 Author: JingsongLi <[email protected]> AuthorDate: Tue Jul 4 12:10:58 2023 +0800 [bug] Fix bug in FieldStatsCollectorUtils --- .../org/apache/paimon/format/TableStatsCollector.java | 19 +++++++++++++------ .../java/org/apache/paimon/AbstractFileStore.java | 5 +++-- .../org/apache/paimon/io/KeyValueDataFileWriter.java | 4 ++-- .../apache/paimon/io/KeyValueFileWriterFactory.java | 4 ++-- .../paimon/operation/AppendOnlyFileStoreWrite.java | 5 +++-- ...{StatsUtils.java => FieldStatsCollectorUtils.java} | 6 ++---- .../apache/paimon/append/AppendOnlyWriterTest.java | 4 ++-- .../apache/paimon/format/FileFormatSuffixTest.java | 4 ++-- .../org/apache/paimon/io/RollingFileWriterTest.java | 4 ++-- .../paimon/manifest/ManifestFileMetaTestBase.java | 4 ++-- .../org/apache/paimon/manifest/ManifestFileTest.java | 4 ++-- .../paimon/utils/FieldStatsCollectorUtilsTest.java | 5 +++-- 12 files changed, 38 insertions(+), 30 deletions(-) diff --git a/paimon-common/src/main/java/org/apache/paimon/format/TableStatsCollector.java b/paimon-common/src/main/java/org/apache/paimon/format/TableStatsCollector.java index 0db227389..4e84a0309 100644 --- a/paimon-common/src/main/java/org/apache/paimon/format/TableStatsCollector.java +++ b/paimon-common/src/main/java/org/apache/paimon/format/TableStatsCollector.java @@ -25,17 +25,24 @@ import org.apache.paimon.statistics.FieldStatsCollector; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.RowDataToObjectArrayConverter; +import static org.apache.paimon.utils.Preconditions.checkArgument; + /** Collector to extract statistics of each fields from a series of records. */ public class TableStatsCollector { private final RowDataToObjectArrayConverter converter; - private final FieldStatsCollector[] stats; + private final FieldStatsCollector[] statsCollectors; private final Serializer<Object>[] fieldSerializers; - public TableStatsCollector(RowType rowType, FieldStatsCollector[] stats) { + public TableStatsCollector(RowType rowType, FieldStatsCollector[] statsCollectors) { int numFields = rowType.getFieldCount(); + checkArgument( + numFields == statsCollectors.length, + "numFields %s should equal to stats length %s.", + numFields, + statsCollectors.length); this.converter = new RowDataToObjectArrayConverter(rowType); - this.stats = stats; + this.statsCollectors = statsCollectors; this.fieldSerializers = new Serializer[numFields]; for (int i = 0; i < numFields; i++) { fieldSerializers[i] = InternalSerializers.create(rowType.getTypeAt(i)); @@ -51,16 +58,16 @@ public class TableStatsCollector { public void collect(InternalRow row) { Object[] objects = converter.convert(row); for (int i = 0; i < row.getFieldCount(); i++) { - FieldStatsCollector collector = stats[i]; + FieldStatsCollector collector = statsCollectors[i]; Object obj = objects[i]; collector.collect(obj, fieldSerializers[i]); } } public FieldStats[] extract() { - FieldStats[] stats = new FieldStats[this.stats.length]; + FieldStats[] stats = new FieldStats[this.statsCollectors.length]; for (int i = 0; i < stats.length; i++) { - stats[i] = this.stats[i].result(); + stats[i] = this.statsCollectors[i].result(); } return stats; } diff --git a/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java b/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java index 883eb2611..c6f924f0f 100644 --- a/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java +++ b/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java @@ -35,10 +35,10 @@ import org.apache.paimon.operation.TagFileKeeper; import org.apache.paimon.options.MemorySize; import org.apache.paimon.schema.SchemaManager; import org.apache.paimon.types.RowType; +import org.apache.paimon.utils.FieldStatsCollectorUtils; import org.apache.paimon.utils.FileStorePathFactory; import org.apache.paimon.utils.SegmentsCache; import org.apache.paimon.utils.SnapshotManager; -import org.apache.paimon.utils.StatsUtils; import org.apache.paimon.utils.TagManager; import javax.annotation.Nullable; @@ -106,7 +106,8 @@ public abstract class AbstractFileStore<T> implements FileStore<T> { pathFactory(), options.manifestTargetSize().getBytes(), forWrite ? writeManifestCache : null, - StatsUtils.getFieldsStatsMode(options, partitionType.getFieldNames())); + FieldStatsCollectorUtils.getFieldsStatsMode( + options, partitionType.getFieldNames())); } @VisibleForTesting diff --git a/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java b/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java index 1f7b16422..27cf60c4f 100644 --- a/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java +++ b/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java @@ -31,7 +31,7 @@ import org.apache.paimon.fs.Path; import org.apache.paimon.stats.BinaryTableStats; import org.apache.paimon.stats.FieldStatsArraySerializer; import org.apache.paimon.types.RowType; -import org.apache.paimon.utils.StatsUtils; +import org.apache.paimon.utils.FieldStatsCollectorUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -88,7 +88,7 @@ public class KeyValueDataFileWriter KeyValue.schema(keyType, valueType), tableStatsExtractor, compression, - StatsUtils.getFieldsStatsMode( + FieldStatsCollectorUtils.getFieldsStatsMode( options, KeyValue.schema(keyType, valueType).getFieldNames())); this.keyType = keyType; diff --git a/paimon-core/src/main/java/org/apache/paimon/io/KeyValueFileWriterFactory.java b/paimon-core/src/main/java/org/apache/paimon/io/KeyValueFileWriterFactory.java index 87dc5d88f..0c8f78cd5 100644 --- a/paimon-core/src/main/java/org/apache/paimon/io/KeyValueFileWriterFactory.java +++ b/paimon-core/src/main/java/org/apache/paimon/io/KeyValueFileWriterFactory.java @@ -29,8 +29,8 @@ import org.apache.paimon.format.TableStatsExtractor; import org.apache.paimon.fs.FileIO; import org.apache.paimon.fs.Path; import org.apache.paimon.types.RowType; +import org.apache.paimon.utils.FieldStatsCollectorUtils; import org.apache.paimon.utils.FileStorePathFactory; -import org.apache.paimon.utils.StatsUtils; import javax.annotation.Nullable; @@ -187,7 +187,7 @@ public class KeyValueFileWriterFactory { fileFormat .createStatsExtractor( recordType, - StatsUtils.getFieldsStatsMode( + FieldStatsCollectorUtils.getFieldsStatsMode( options, recordType.getFieldNames())) .orElse(null), pathFactory.createDataFilePathFactory(partition, bucket), diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java b/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java index f22572676..eb24e4b5e 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java @@ -37,11 +37,11 @@ import org.apache.paimon.table.BucketMode; import org.apache.paimon.table.source.DataSplit; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.CommitIncrement; +import org.apache.paimon.utils.FieldStatsCollectorUtils; import org.apache.paimon.utils.FileStorePathFactory; import org.apache.paimon.utils.LongCounter; import org.apache.paimon.utils.RecordWriter; import org.apache.paimon.utils.SnapshotManager; -import org.apache.paimon.utils.StatsUtils; import javax.annotation.Nullable; @@ -95,7 +95,8 @@ public class AppendOnlyFileStoreWrite extends AbstractFileStoreWrite<InternalRow this.skipCompaction = options.writeOnly(); this.assertDisorder = options.toConfiguration().get(APPEND_ONLY_ASSERT_DISORDER); this.fileCompression = options.fileCompression(); - this.statsCollectors = StatsUtils.getFieldsStatsMode(options, rowType.getFieldNames()); + this.statsCollectors = + FieldStatsCollectorUtils.getFieldsStatsMode(options, rowType.getFieldNames()); } @Override diff --git a/paimon-core/src/main/java/org/apache/paimon/utils/StatsUtils.java b/paimon-core/src/main/java/org/apache/paimon/utils/FieldStatsCollectorUtils.java similarity index 92% rename from paimon-core/src/main/java/org/apache/paimon/utils/StatsUtils.java rename to paimon-core/src/main/java/org/apache/paimon/utils/FieldStatsCollectorUtils.java index ab3d5e1d7..5828462d9 100644 --- a/paimon-core/src/main/java/org/apache/paimon/utils/StatsUtils.java +++ b/paimon-core/src/main/java/org/apache/paimon/utils/FieldStatsCollectorUtils.java @@ -31,15 +31,13 @@ import static org.apache.paimon.CoreOptions.STATS_MODE_SUFFIX; import static org.apache.paimon.options.ConfigOptions.key; /** The stats utils. */ -public class StatsUtils { +public class FieldStatsCollectorUtils { public static FieldStatsCollector[] getFieldsStatsMode( CoreOptions options, List<String> fields) { Options cfg = options.toConfiguration(); - FieldStatsCollector defaultMode = FieldStatsCollector.from(cfg.get(CoreOptions.STATS_MODE)); FieldStatsCollector[] modes = new FieldStatsCollector[fields.size()]; for (int i = 0; i < fields.size(); i++) { - String fieldMode = cfg.get( key(String.format( @@ -50,7 +48,7 @@ public class StatsUtils { if (fieldMode != null) { modes[i] = FieldStatsCollector.from(fieldMode); } else { - modes[i] = defaultMode; + modes[i] = FieldStatsCollector.from(cfg.get(CoreOptions.STATS_MODE)); } } return modes; diff --git a/paimon-core/src/test/java/org/apache/paimon/append/AppendOnlyWriterTest.java b/paimon-core/src/test/java/org/apache/paimon/append/AppendOnlyWriterTest.java index 92a8cb531..2720252d0 100644 --- a/paimon-core/src/test/java/org/apache/paimon/append/AppendOnlyWriterTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/append/AppendOnlyWriterTest.java @@ -38,9 +38,9 @@ import org.apache.paimon.types.RowType; import org.apache.paimon.types.VarCharType; import org.apache.paimon.utils.CommitIncrement; import org.apache.paimon.utils.ExecutorThreadFactory; +import org.apache.paimon.utils.FieldStatsCollectorUtils; import org.apache.paimon.utils.Pair; import org.apache.paimon.utils.RecordWriter; -import org.apache.paimon.utils.StatsUtils; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -320,7 +320,7 @@ public class AppendOnlyWriterTest { pathFactory, null, CoreOptions.FILE_COMPRESSION.defaultValue(), - StatsUtils.getFieldsStatsMode( + FieldStatsCollectorUtils.getFieldsStatsMode( new CoreOptions(new HashMap<>()), AppendOnlyWriterTest.SCHEMA.getFieldNames())); return Pair.of(writer, compactManager.allFiles()); diff --git a/paimon-core/src/test/java/org/apache/paimon/format/FileFormatSuffixTest.java b/paimon-core/src/test/java/org/apache/paimon/format/FileFormatSuffixTest.java index 13caec6b7..604998bc8 100644 --- a/paimon-core/src/test/java/org/apache/paimon/format/FileFormatSuffixTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/format/FileFormatSuffixTest.java @@ -35,7 +35,7 @@ import org.apache.paimon.types.IntType; import org.apache.paimon.types.RowType; import org.apache.paimon.types.VarCharType; import org.apache.paimon.utils.CommitIncrement; -import org.apache.paimon.utils.StatsUtils; +import org.apache.paimon.utils.FieldStatsCollectorUtils; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -77,7 +77,7 @@ public class FileFormatSuffixTest extends KeyValueFileReadWriteTest { dataFilePathFactory, null, CoreOptions.FILE_COMPRESSION.defaultValue(), - StatsUtils.getFieldsStatsMode( + FieldStatsCollectorUtils.getFieldsStatsMode( new CoreOptions(new HashMap<>()), SCHEMA.getFieldNames())); appendOnlyWriter.write( GenericRow.of(1, BinaryString.fromString("aaa"), BinaryString.fromString("1"))); diff --git a/paimon-core/src/test/java/org/apache/paimon/io/RollingFileWriterTest.java b/paimon-core/src/test/java/org/apache/paimon/io/RollingFileWriterTest.java index 7b273b693..e012c7ab1 100644 --- a/paimon-core/src/test/java/org/apache/paimon/io/RollingFileWriterTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/io/RollingFileWriterTest.java @@ -30,8 +30,8 @@ import org.apache.paimon.statistics.FullFieldStatsCollector; import org.apache.paimon.types.DataType; import org.apache.paimon.types.IntType; import org.apache.paimon.types.RowType; +import org.apache.paimon.utils.FieldStatsCollectorUtils; import org.apache.paimon.utils.LongCounter; -import org.apache.paimon.utils.StatsUtils; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.junit.jupiter.api.io.TempDir; @@ -92,7 +92,7 @@ public class RollingFileWriterTest { 0L, new LongCounter(0), CoreOptions.FILE_COMPRESSION.defaultValue(), - StatsUtils.getFieldsStatsMode( + FieldStatsCollectorUtils.getFieldsStatsMode( new CoreOptions(new HashMap<>()), SCHEMA.getFieldNames())), TARGET_FILE_SIZE); diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java index a4ea867fa..b953f88cd 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTestBase.java @@ -31,8 +31,8 @@ import org.apache.paimon.options.Options; import org.apache.paimon.schema.SchemaManager; import org.apache.paimon.stats.StatsTestUtils; import org.apache.paimon.types.RowType; +import org.apache.paimon.utils.FieldStatsCollectorUtils; import org.apache.paimon.utils.FileStorePathFactory; -import org.apache.paimon.utils.StatsUtils; import java.util.ArrayList; import java.util.Arrays; @@ -137,7 +137,7 @@ public abstract class ManifestFileMetaTestBase { CoreOptions.FILE_FORMAT.defaultValue().toString()), Long.MAX_VALUE, null, - StatsUtils.getFieldsStatsMode( + FieldStatsCollectorUtils.getFieldsStatsMode( new CoreOptions(new HashMap<>()), getPartitionType().getFieldNames())) .create(); diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java index a664fb2d3..5989234b8 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileTest.java @@ -28,8 +28,8 @@ import org.apache.paimon.options.Options; import org.apache.paimon.schema.SchemaManager; import org.apache.paimon.stats.StatsTestUtils; import org.apache.paimon.utils.FailingFileIO; +import org.apache.paimon.utils.FieldStatsCollectorUtils; import org.apache.paimon.utils.FileStorePathFactory; -import org.apache.paimon.utils.StatsUtils; import org.junit.jupiter.api.RepeatedTest; import org.junit.jupiter.api.io.TempDir; @@ -111,7 +111,7 @@ public class ManifestFileTest { pathFactory, suggestedFileSize, null, - StatsUtils.getFieldsStatsMode( + FieldStatsCollectorUtils.getFieldsStatsMode( new CoreOptions(new HashMap<>()), DEFAULT_PART_TYPE.getFieldNames())) .create(); diff --git a/paimon-core/src/test/java/org/apache/paimon/utils/FieldStatsCollectorUtilsTest.java b/paimon-core/src/test/java/org/apache/paimon/utils/FieldStatsCollectorUtilsTest.java index 74ad41da2..e69b38bae 100644 --- a/paimon-core/src/test/java/org/apache/paimon/utils/FieldStatsCollectorUtilsTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/utils/FieldStatsCollectorUtilsTest.java @@ -36,7 +36,7 @@ import org.junit.jupiter.api.Test; import java.util.Arrays; -/** Test for {@link StatsUtils}. */ +/** Test for {@link FieldStatsCollectorUtils}. */ public class FieldStatsCollectorUtilsTest { @Test public void testFieldStats() { @@ -53,7 +53,8 @@ public class FieldStatsCollectorUtilsTest { options.set(CoreOptions.FIELDS_PREFIX + ".c." + CoreOptions.STATS_MODE_SUFFIX, "full"); FieldStatsCollector[] stats = - StatsUtils.getFieldsStatsMode(new CoreOptions(options), type.getFieldNames()); + FieldStatsCollectorUtils.getFieldsStatsMode( + new CoreOptions(options), type.getFieldNames()); Assertions.assertEquals(3, stats.length); Assertions.assertEquals(16, ((TruncateFieldStatsCollector) stats[0]).getLength()); Assertions.assertEquals(12, ((TruncateFieldStatsCollector) stats[1]).getLength());
