This is an automated email from the ASF dual-hosted git repository. gangwu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push: new 23c788d0c PARQUET-2463: Bump japicmp to 0.21.0 (#1329) 23c788d0c is described below commit 23c788d0c48ea3520f9402fbb78fc78a3a9390d8 Author: Gang Wu <ust...@gmail.com> AuthorDate: Fri Apr 26 17:28:29 2024 +0800 PARQUET-2463: Bump japicmp to 0.21.0 (#1329) --- .../org/apache/parquet/avro/AvroParquetReader.java | 4 +- .../java/org/apache/parquet/io/api/Binary.java | 4 +- .../org/apache/parquet/hadoop/CodecFactory.java | 21 +++++--- .../parquet/hadoop/ColumnChunkPageWriteStore.java | 44 +++++++++++++++++ .../apache/parquet/hadoop/DirectCodecFactory.java | 4 +- .../org/apache/parquet/hadoop/ParquetReader.java | 37 ++++++++------ .../apache/parquet/hadoop/ParquetRecordWriter.java | 57 ++++++++++++++++++++++ .../apache/parquet/thrift/struct/ThriftType.java | 8 ++- pom.xml | 33 ++----------- 9 files changed, 154 insertions(+), 58 deletions(-) diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroParquetReader.java b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroParquetReader.java index aefbf7b8b..805d7579c 100644 --- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroParquetReader.java +++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroParquetReader.java @@ -192,9 +192,9 @@ public class AvroParquetReader<T> extends ParquetReader<T> { @Override protected ReadSupport<T> getReadSupport() { if (isReflect) { - conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false); + configuration.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false); } else { - conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, enableCompatibility); + configuration.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, enableCompatibility); } return new AvroReadSupport<T>(model); } diff --git a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java index d9a172cec..e37ee1248 100644 --- a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java +++ b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java @@ -86,7 +86,9 @@ public abstract class Binary implements Comparable<Binary>, Serializable { public abstract ByteBuffer toByteBuffer(); - public abstract short get2BytesLittleEndian(); + public short get2BytesLittleEndian() { + throw new UnsupportedOperationException("Not implemented"); + } @Override public boolean equals(Object obj) { diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/CodecFactory.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/CodecFactory.java index f0775484c..f1041a83b 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/CodecFactory.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/CodecFactory.java @@ -51,9 +51,13 @@ public class CodecFactory implements CompressionCodecFactory { private final Map<CompressionCodecName, BytesCompressor> compressors = new HashMap<>(); private final Map<CompressionCodecName, BytesDecompressor> decompressors = new HashMap<>(); - protected final ParquetConfiguration configuration; + protected final ParquetConfiguration conf; protected final int pageSize; + // May be null if parquetConfiguration is not an instance of org.apache.parquet.conf.HadoopParquetConfiguration + @Deprecated + protected final Configuration configuration; + static final BytesDecompressor NO_OP_DECOMPRESSOR = new BytesDecompressor() { @Override public void decompress(ByteBuffer input, int compressedSize, ByteBuffer output, int decompressedSize) { @@ -115,7 +119,12 @@ public class CodecFactory implements CompressionCodecFactory { * decompressors this parameter has no impact on the function of the factory */ public CodecFactory(ParquetConfiguration configuration, int pageSize) { - this.configuration = configuration; + if (configuration instanceof HadoopParquetConfiguration) { + this.configuration = ((HadoopParquetConfiguration) configuration).getConfiguration(); + } else { + this.configuration = null; + } + this.conf = configuration; this.pageSize = pageSize; } @@ -293,7 +302,7 @@ public class CodecFactory implements CompressionCodecFactory { codecClass = new Configuration(false).getClassLoader().loadClass(codecClassName); } codec = (CompressionCodec) - ReflectionUtils.newInstance(codecClass, ConfigurationUtil.createHadoopConfiguration(configuration)); + ReflectionUtils.newInstance(codecClass, ConfigurationUtil.createHadoopConfiguration(conf)); CODEC_BY_NAME.put(codecCacheKey, codec); return codec; } catch (ClassNotFoundException e) { @@ -305,13 +314,13 @@ public class CodecFactory implements CompressionCodecFactory { String level = null; switch (codecName) { case GZIP: - level = configuration.get("zlib.compress.level"); + level = conf.get("zlib.compress.level"); break; case BROTLI: - level = configuration.get("compression.brotli.quality"); + level = conf.get("compression.brotli.quality"); break; case ZSTD: - level = configuration.get("parquet.compression.codec.zstd.level"); + level = conf.get("parquet.compression.codec.zstd.level"); break; default: // compression level is not supported; ignore it diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java index 5599d2509..795063e5c 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java @@ -50,6 +50,7 @@ import org.apache.parquet.crypto.InternalFileEncryptor; import org.apache.parquet.crypto.ModuleCipherFactory.ModuleType; import org.apache.parquet.format.BlockCipher; import org.apache.parquet.format.converter.ParquetMetadataConverter; +import org.apache.parquet.hadoop.CodecFactory.BytesCompressor; import org.apache.parquet.hadoop.metadata.ColumnPath; import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder; import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder; @@ -513,6 +514,20 @@ public class ColumnChunkPageWriteStore implements PageWriteStore, BloomFilterWri new HashMap<ColumnDescriptor, ColumnChunkPageWriter>(); private final MessageType schema; + @Deprecated + public ColumnChunkPageWriteStore( + BytesCompressor compressor, + MessageType schema, + ByteBufferAllocator allocator, + int columnIndexTruncateLength) { + this( + (BytesInputCompressor) compressor, + schema, + allocator, + columnIndexTruncateLength, + ParquetProperties.DEFAULT_PAGE_WRITE_CHECKSUM_ENABLED); + } + public ColumnChunkPageWriteStore( BytesInputCompressor compressor, MessageType schema, @@ -526,6 +541,16 @@ public class ColumnChunkPageWriteStore implements PageWriteStore, BloomFilterWri ParquetProperties.DEFAULT_PAGE_WRITE_CHECKSUM_ENABLED); } + @Deprecated + public ColumnChunkPageWriteStore( + BytesCompressor compressor, + MessageType schema, + ByteBufferAllocator allocator, + int columnIndexTruncateLength, + boolean pageWriteChecksumEnabled) { + this((BytesInputCompressor) compressor, schema, allocator, columnIndexTruncateLength, pageWriteChecksumEnabled); + } + public ColumnChunkPageWriteStore( BytesInputCompressor compressor, MessageType schema, @@ -550,6 +575,25 @@ public class ColumnChunkPageWriteStore implements PageWriteStore, BloomFilterWri } } + @Deprecated + public ColumnChunkPageWriteStore( + BytesCompressor compressor, + MessageType schema, + ByteBufferAllocator allocator, + int columnIndexTruncateLength, + boolean pageWriteChecksumEnabled, + InternalFileEncryptor fileEncryptor, + int rowGroupOrdinal) { + this( + (BytesInputCompressor) compressor, + schema, + allocator, + columnIndexTruncateLength, + pageWriteChecksumEnabled, + fileEncryptor, + rowGroupOrdinal); + } + public ColumnChunkPageWriteStore( BytesInputCompressor compressor, MessageType schema, diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/DirectCodecFactory.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/DirectCodecFactory.java index 523e57dbf..b2b5233ee 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/DirectCodecFactory.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/DirectCodecFactory.java @@ -410,9 +410,9 @@ class DirectCodecFactory extends CodecFactory implements AutoCloseable { ZstdCompressor() { context = new ZstdCompressCtx(); - context.setLevel(configuration.getInt( + context.setLevel(conf.getInt( ZstandardCodec.PARQUET_COMPRESS_ZSTD_LEVEL, ZstandardCodec.DEFAULT_PARQUET_COMPRESS_ZSTD_LEVEL)); - context.setWorkers(configuration.getInt( + context.setWorkers(conf.getInt( ZstandardCodec.PARQUET_COMPRESS_ZSTD_WORKERS, ZstandardCodec.DEFAULTPARQUET_COMPRESS_ZSTD_WORKERS)); } diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java index 47647f10a..9ca1202eb 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java @@ -196,17 +196,21 @@ public class ParquetReader<T> implements Closeable { private final Path path; private Filter filter = null; private ByteBufferAllocator allocator = new HeapByteBufferAllocator(); - protected ParquetConfiguration conf; + protected ParquetConfiguration configuration; private ParquetReadOptions.Builder optionsBuilder; + // May be null if parquetConfiguration is not an instance of org.apache.parquet.conf.HadoopParquetConfiguration + @Deprecated + protected Configuration conf; + @Deprecated private Builder(ReadSupport<T> readSupport, Path path) { this.readSupport = Objects.requireNonNull(readSupport, "readSupport cannot be null"); this.file = null; this.path = Objects.requireNonNull(path, "path cannot be null"); - Configuration hadoopConf = new Configuration(); - this.conf = new HadoopParquetConfiguration(hadoopConf); - this.optionsBuilder = HadoopReadOptions.builder(hadoopConf, path); + this.conf = new Configuration(); + this.configuration = new HadoopParquetConfiguration(this.conf); + this.optionsBuilder = HadoopReadOptions.builder(this.conf, path); } @Deprecated @@ -214,9 +218,9 @@ public class ParquetReader<T> implements Closeable { this.readSupport = null; this.file = null; this.path = Objects.requireNonNull(path, "path cannot be null"); - Configuration hadoopConf = new Configuration(); - this.conf = new HadoopParquetConfiguration(hadoopConf); - this.optionsBuilder = HadoopReadOptions.builder(hadoopConf, path); + this.conf = new Configuration(); + this.configuration = new HadoopParquetConfiguration(this.conf); + this.optionsBuilder = HadoopReadOptions.builder(this.conf, path); } protected Builder(InputFile file) { @@ -225,9 +229,9 @@ public class ParquetReader<T> implements Closeable { this.path = null; if (file instanceof HadoopInputFile) { HadoopInputFile hadoopFile = (HadoopInputFile) file; - Configuration hadoopConf = hadoopFile.getConfiguration(); - this.conf = new HadoopParquetConfiguration(hadoopConf); - optionsBuilder = HadoopReadOptions.builder(hadoopConf, hadoopFile.getPath()); + this.conf = hadoopFile.getConfiguration(); + this.configuration = new HadoopParquetConfiguration(this.conf); + optionsBuilder = HadoopReadOptions.builder(this.conf, hadoopFile.getPath()); } else { optionsBuilder = ParquetReadOptions.builder(new HadoopParquetConfiguration()); } @@ -237,11 +241,11 @@ public class ParquetReader<T> implements Closeable { this.readSupport = null; this.file = Objects.requireNonNull(file, "file cannot be null"); this.path = null; - this.conf = conf; + this.configuration = conf; if (file instanceof HadoopInputFile) { + this.conf = ConfigurationUtil.createHadoopConfiguration(conf); HadoopInputFile hadoopFile = (HadoopInputFile) file; - optionsBuilder = HadoopReadOptions.builder( - ConfigurationUtil.createHadoopConfiguration(conf), hadoopFile.getPath()); + optionsBuilder = HadoopReadOptions.builder(this.conf, hadoopFile.getPath()); } else { optionsBuilder = ParquetReadOptions.builder(conf); } @@ -249,7 +253,8 @@ public class ParquetReader<T> implements Closeable { // when called, resets options to the defaults from conf public Builder<T> withConf(Configuration conf) { - this.conf = new HadoopParquetConfiguration(Objects.requireNonNull(conf, "conf cannot be null")); + this.conf = Objects.requireNonNull(conf, "conf cannot be null"); + this.configuration = new HadoopParquetConfiguration(this.conf); // previous versions didn't use the builder, so may set filter before conf. this maintains // compatibility for filter. other options are reset by a new conf. @@ -262,7 +267,7 @@ public class ParquetReader<T> implements Closeable { } public Builder<T> withConf(ParquetConfiguration conf) { - this.conf = conf; + this.configuration = conf; this.optionsBuilder = ParquetReadOptions.builder(conf); if (filter != null) { optionsBuilder.withRecordFilter(filter); @@ -383,7 +388,7 @@ public class ParquetReader<T> implements Closeable { ParquetReadOptions options = optionsBuilder.withAllocator(allocator).build(); if (path != null) { - Configuration hadoopConf = ConfigurationUtil.createHadoopConfiguration(conf); + Configuration hadoopConf = ConfigurationUtil.createHadoopConfiguration(configuration); FileSystem fs = path.getFileSystem(hadoopConf); FileStatus stat = fs.getFileStatus(path); diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordWriter.java index 0e2f49eae..51528b10b 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordWriter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordWriter.java @@ -27,6 +27,7 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.column.ParquetProperties.WriterVersion; import org.apache.parquet.compression.CompressionCodecFactory.BytesInputCompressor; +import org.apache.parquet.hadoop.CodecFactory.BytesCompressor; import org.apache.parquet.hadoop.api.WriteSupport; import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.schema.MessageType; @@ -43,6 +44,33 @@ public class ParquetRecordWriter<T> extends RecordWriter<Void, T> { private final MemoryManager memoryManager; private final CodecFactory codecFactory; + @Deprecated + public ParquetRecordWriter( + ParquetFileWriter w, + WriteSupport<T> writeSupport, + MessageType schema, + Map<String, String> extraMetaData, + int blockSize, + int pageSize, + BytesCompressor compressor, + int dictionaryPageSize, + boolean enableDictionary, + boolean validating, + WriterVersion writerVersion) { + this( + w, + writeSupport, + schema, + extraMetaData, + blockSize, + pageSize, + (BytesInputCompressor) compressor, + dictionaryPageSize, + enableDictionary, + validating, + writerVersion); + } + /** * @param w the file to write to * @param writeSupport the class to convert incoming records @@ -81,6 +109,35 @@ public class ParquetRecordWriter<T> extends RecordWriter<Void, T> { this.codecFactory = null; } + @Deprecated + public ParquetRecordWriter( + ParquetFileWriter w, + WriteSupport<T> writeSupport, + MessageType schema, + Map<String, String> extraMetaData, + long blockSize, + int pageSize, + BytesCompressor compressor, + int dictionaryPageSize, + boolean enableDictionary, + boolean validating, + WriterVersion writerVersion, + MemoryManager memoryManager) { + this( + w, + writeSupport, + schema, + extraMetaData, + blockSize, + pageSize, + (BytesInputCompressor) compressor, + dictionaryPageSize, + enableDictionary, + validating, + writerVersion, + memoryManager); + } + /** * @param w the file to write to * @param writeSupport the class to convert incoming records diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java index 3988c03f2..264790333 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java @@ -134,7 +134,9 @@ public abstract class ThriftType { R visit(StringType stringType, S state); - R visit(UUIDType uuidType, S state); + default R visit(UUIDType uuidType, S state) { + throw new UnsupportedOperationException("Not implemented"); + } } /** @@ -166,7 +168,9 @@ public abstract class ThriftType { void visit(StringType stringType); - void visit(UUIDType uuidType); + default void visit(UUIDType uuidType) { + throw new UnsupportedOperationException("Not implemented"); + } } /** diff --git a/pom.xml b/pom.xml index a5625da6e..a81ae1630 100644 --- a/pom.xml +++ b/pom.xml @@ -69,13 +69,13 @@ <jackson.package>com.fasterxml.jackson</jackson.package> <jackson.version>2.17.0</jackson.version> <jackson-databind.version>2.17.0</jackson-databind.version> - <japicmp.version>0.18.2</japicmp.version> + <japicmp.version>0.21.0</japicmp.version> <javax.annotation.version>1.3.2</javax.annotation.version> <spotless.version>2.30.0</spotless.version> <shade.prefix>shaded.parquet</shade.prefix> <hadoop.version>3.3.6</hadoop.version> <parquet.format.version>2.10.0</parquet.format.version> - <previous.version>1.13.0</previous.version> + <previous.version>1.13.1</previous.version> <thrift.executable>thrift</thrift.executable> <format.thrift.executable>${thrift.executable}</format.thrift.executable> <scala.version>2.12.17</scala.version> @@ -577,39 +577,14 @@ </excludeModules> <excludes> <exclude>${shade.prefix}</exclude> - <exclude>org.apache.parquet.hadoop.CodecFactory</exclude> <!-- change field type from Configuration to ParquetConfiguration --> - <exclude>org.apache.parquet.hadoop.ParquetReader</exclude> <!-- change field type from Configuration to ParquetConfiguration --> - <exclude>org.apache.parquet.thrift.projection.deprecated.PathGlobPattern</exclude> - <!-- japicmp is overly aggressive on interface types in signatures, a type was changed to a supertype but this still triggers it --> - <exclude>org.apache.parquet.hadoop.ColumnChunkPageWriteStore</exclude> - <exclude>org.apache.parquet.hadoop.ParquetRecordWriter</exclude> - <!-- likely japicmp bug, triggers on new interface methods after updating to 0.18.1 --> - <exclude>org.apache.parquet.conf.PlainParquetConfiguration#getClass(java.lang.String,java.lang.Class,java.lang.Class)</exclude> - <exclude>org.apache.parquet.conf.ParquetConfiguration#getClass(java.lang.String,java.lang.Class,java.lang.Class)</exclude> - <exclude>org.apache.parquet.hadoop.util.SerializationUtil#readObjectFromConfAsBase64(java.lang.String,org.apache.parquet.conf.ParquetConfiguration)</exclude> - <!-- The two lines below are not part of the API but japicmp reports errors unless excluded --> - <exclude>org.apache.parquet.hadoop.util.wrapped.io.FutureIO#awaitFuture(java.util.concurrent.Future,long,java.util.concurrent.TimeUnit)</exclude> - <exclude>org.apache.parquet.hadoop.util.wrapped.io.FutureIO#raiseInnerCause(java.util.concurrent.ExecutionException)</exclude> - <exclude>org.apache.parquet.conf.HadoopParquetConfiguration#getClass(java.lang.String,java.lang.Class,java.lang.Class)</exclude> - <exclude>org.apache.parquet.avro.AvroParquetReader#builder(org.apache.parquet.io.InputFile,org.apache.parquet.conf.ParquetConfiguration)</exclude> - <exclude>org.apache.parquet.hadoop.thrift.TBaseWriteSupport#setThriftClass(org.apache.parquet.conf.ParquetConfiguration,java.lang.Class)</exclude> - <exclude>org.apache.parquet.proto.ProtoParquetReader#builder(org.apache.hadoop.fs.Path,boolean)</exclude> - <exclude>org.apache.parquet.proto.ProtoParquetReader#builder(org.apache.parquet.io.InputFile,boolean)</exclude> - <!-- removal of a protected method in a class that's not supposed to be subclassed by third-party code --> + <!-- Removal of a protected method in a class that's not supposed to be subclassed by third-party code --> <exclude>org.apache.parquet.column.values.bytestreamsplit.ByteStreamSplitValuesReader#gatherElementDataFromStreams(byte[])</exclude> - <!-- Due to the removal of deprecated methods --> - <exclude>org.apache.parquet.arrow.schema.SchemaMapping</exclude> - + <exclude>org.apache.parquet.arrow.schema.SchemaMapping$TypeMappingVisitor#visit(org.apache.parquet.arrow.schema.SchemaMapping$MapTypeMapping)</exclude> <!-- Make static variables final --> <exclude>org.apache.parquet.avro.AvroReadSupport#AVRO_REQUESTED_PROJECTION</exclude> <exclude>org.apache.parquet.avro.AvroReadSupport#AVRO_DATA_SUPPLIER</exclude> <exclude>org.apache.parquet.hadoop.ParquetFileReader#PARQUET_READ_PARALLELISM</exclude> - - <exclude>org.apache.parquet.thrift.struct.ThriftType</exclude> - - <exclude>org.apache.parquet.io.api.Binary#get2BytesLittleEndian()</exclude> - <exclude>org.apache.parquet.schema.LogicalTypeAnnotation$Float16LogicalTypeAnnotation#accept(org.apache.parquet.schema.LogicalTypeAnnotation$LogicalTypeAnnotationVisitor)</exclude> </excludes> </parameter> </configuration>