This is an automated email from the ASF dual-hosted git repository. gangwu pushed a commit to branch parquet-1.14.x in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/parquet-1.14.x by this push: new 9c8fde0c7 PARQUET-2468: ParquetMetadata must convert to json (#1349) (#1360) 9c8fde0c7 is described below commit 9c8fde0c7a8bacaba28c9e0a953aba0a83b25fdd Author: Gang Wu <ust...@gmail.com> AuthorDate: Fri May 31 09:39:23 2024 +0800 PARQUET-2468: ParquetMetadata must convert to json (#1349) (#1360) Co-authored-by: Michel Davit <mic...@davit.fr> --- parquet-hadoop/pom.xml | 10 ++++++ .../hadoop/metadata/ColumnChunkMetaData.java | 4 +++ .../hadoop/metadata/ColumnChunkProperties.java | 2 ++ .../parquet/hadoop/metadata/FileMetaData.java | 2 ++ .../parquet/hadoop/metadata/ParquetMetadata.java | 35 ++++++++++++------ .../converter/TestParquetMetadataConverter.java | 41 ++++++++++++++++++++-- parquet-jackson/pom.xml | 18 ++++++++++ pom.xml | 1 + 8 files changed, 100 insertions(+), 13 deletions(-) diff --git a/parquet-hadoop/pom.xml b/parquet-hadoop/pom.xml index 7d02ac54a..465d7c95f 100644 --- a/parquet-hadoop/pom.xml +++ b/parquet-hadoop/pom.xml @@ -118,11 +118,21 @@ <artifactId>jackson-core</artifactId> <version>${jackson.version}</version> </dependency> + <dependency> + <groupId>${jackson.groupId}</groupId> + <artifactId>jackson-annotations</artifactId> + <version>${jackson.version}</version> + </dependency> <dependency> <groupId>${jackson.groupId}</groupId> <artifactId>jackson-databind</artifactId> <version>${jackson-databind.version}</version> </dependency> + <dependency> + <groupId>${jackson.datatype.groupId}</groupId> + <artifactId>jackson-datatype-jdk8</artifactId> + <version>${jackson-modules-java8.version}</version> + </dependency> <dependency> <groupId>org.xerial.snappy</groupId> <artifactId>snappy-java</artifactId> diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java index 3dac15ba7..14a949b0e 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java @@ -22,6 +22,7 @@ import static org.apache.parquet.column.Encoding.PLAIN_DICTIONARY; import static org.apache.parquet.column.Encoding.RLE_DICTIONARY; import static org.apache.parquet.format.Util.readColumnMetaData; +import com.fasterxml.jackson.annotation.JsonIgnore; import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.Set; @@ -338,6 +339,7 @@ public abstract class ColumnChunkMetaData { * @deprecated will be removed in 2.0.0. Use {@link #getPrimitiveType()} instead. */ @Deprecated + @JsonIgnore public PrimitiveTypeName getType() { decryptIfNeeded(); return properties.getType(); @@ -380,6 +382,7 @@ public abstract class ColumnChunkMetaData { /** * @return the stats for this column */ + @JsonIgnore public abstract Statistics getStatistics(); /** @@ -387,6 +390,7 @@ public abstract class ColumnChunkMetaData { * * @return the size stats for this column */ + @JsonIgnore public SizeStatistics getSizeStatistics() { throw new UnsupportedOperationException("SizeStatistics is not implemented"); } diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java index 3b0a33b14..026e37a1c 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java @@ -18,6 +18,7 @@ */ package org.apache.parquet.hadoop.metadata; +import com.fasterxml.jackson.annotation.JsonIgnore; import java.util.Arrays; import java.util.Set; import org.apache.parquet.column.Encoding; @@ -76,6 +77,7 @@ public class ColumnChunkProperties { * @deprecated will be removed in 2.0.0. Use {@link #getPrimitiveType()} instead. */ @Deprecated + @JsonIgnore public PrimitiveTypeName getType() { return type.getPrimitiveTypeName(); } diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/FileMetaData.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/FileMetaData.java index c608cd405..4143dd805 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/FileMetaData.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/FileMetaData.java @@ -20,6 +20,7 @@ package org.apache.parquet.hadoop.metadata; import static java.util.Collections.unmodifiableMap; +import com.fasterxml.jackson.annotation.JsonIgnore; import java.io.Serializable; import java.util.Map; import java.util.Objects; @@ -109,6 +110,7 @@ public final class FileMetaData implements Serializable { return createdBy; } + @JsonIgnore public InternalFileDecryptor getFileDecryptor() { return fileDecryptor; } diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java index e30e872a6..640ecfba1 100755 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java @@ -19,6 +19,9 @@ package org.apache.parquet.hadoop.metadata; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectWriter; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.datatype.jdk8.Jdk8Module; import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; @@ -32,6 +35,14 @@ public class ParquetMetadata { private static final ObjectMapper objectMapper = new ObjectMapper(); + static { + // Enable FAIL_ON_EMPTY_BEANS on objectmapper. Without this feature parquet-casdacing tests fail, + // because LogicalTypeAnnotation implementations are classes without any property. + objectMapper.disable(SerializationFeature.FAIL_ON_EMPTY_BEANS); + // Add support for Java 8 Optional + objectMapper.registerModule(new Jdk8Module()); + } + /** * @param parquetMetaData an instance of parquet metadata to convert * @return the json representation @@ -50,19 +61,23 @@ public class ParquetMetadata { private static String toJSON(ParquetMetadata parquetMetaData, boolean isPrettyPrint) { try (StringWriter stringWriter = new StringWriter()) { + Object objectToPrint; + if (parquetMetaData.getFileMetaData() == null + || parquetMetaData.getFileMetaData().getEncryptionType() + == FileMetaData.EncryptionType.UNENCRYPTED) { + objectToPrint = parquetMetaData; + } else { + objectToPrint = parquetMetaData.getFileMetaData(); + } + + ObjectWriter writer; if (isPrettyPrint) { - Object objectToPrint; - if (parquetMetaData.getFileMetaData() == null - || parquetMetaData.getFileMetaData().getEncryptionType() - == FileMetaData.EncryptionType.UNENCRYPTED) { - objectToPrint = parquetMetaData; - } else { - objectToPrint = parquetMetaData.getFileMetaData(); - } - objectMapper.writerWithDefaultPrettyPrinter().writeValue(stringWriter, objectToPrint); + writer = objectMapper.writerWithDefaultPrettyPrinter(); } else { - objectMapper.writeValue(stringWriter, parquetMetaData); + writer = objectMapper.writer(); } + + writer.writeValue(stringWriter, objectToPrint); return stringWriter.toString(); } catch (IOException e) { throw new RuntimeException(e); diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index 4dcede624..2cffb5186 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -87,6 +87,10 @@ import org.apache.parquet.column.statistics.IntStatistics; import org.apache.parquet.column.statistics.LongStatistics; import org.apache.parquet.column.statistics.SizeStatistics; import org.apache.parquet.column.statistics.Statistics; +import org.apache.parquet.crypto.DecryptionPropertiesFactory; +import org.apache.parquet.crypto.EncryptionPropertiesFactory; +import org.apache.parquet.crypto.FileDecryptionProperties; +import org.apache.parquet.crypto.InternalFileDecryptor; import org.apache.parquet.example.Paper; import org.apache.parquet.example.data.Group; import org.apache.parquet.example.data.simple.SimpleGroup; @@ -635,11 +639,16 @@ public class TestParquetMetadataConverter { } @Test - public void testNullFieldMetadataDebugLogging() { + public void testFieldMetadataDebugLogging() { MessageType schema = parseMessageType("message test { optional binary some_null_field; }"); org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData = - new org.apache.parquet.hadoop.metadata.FileMetaData(schema, new HashMap<String, String>(), null); - List<BlockMetaData> blockMetaDataList = new ArrayList<BlockMetaData>(); + new org.apache.parquet.hadoop.metadata.FileMetaData( + schema, + new HashMap<>(), + null, + org.apache.parquet.hadoop.metadata.FileMetaData.EncryptionType.UNENCRYPTED, + null); + List<BlockMetaData> blockMetaDataList = new ArrayList<>(); BlockMetaData blockMetaData = new BlockMetaData(); blockMetaData.addColumn(createColumnChunkMetaData()); blockMetaDataList.add(blockMetaData); @@ -647,6 +656,32 @@ public class TestParquetMetadataConverter { ParquetMetadata.toJSON(metadata); } + @Test + public void testEncryptedFieldMetadataDebugLogging() { + Configuration conf = new Configuration(); + conf.set( + EncryptionPropertiesFactory.CRYPTO_FACTORY_CLASS_PROPERTY_NAME, + "org.apache.parquet.crypto.SampleDecryptionPropertiesFactory"); + DecryptionPropertiesFactory decryptionPropertiesFactory = DecryptionPropertiesFactory.loadFactory(conf); + FileDecryptionProperties decryptionProperties = + decryptionPropertiesFactory.getFileDecryptionProperties(conf, null); + + MessageType schema = parseMessageType("message test { optional binary some_null_field; }"); + + org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData = + new org.apache.parquet.hadoop.metadata.FileMetaData( + schema, + new HashMap<>(), + null, + org.apache.parquet.hadoop.metadata.FileMetaData.EncryptionType.ENCRYPTED_FOOTER, + new InternalFileDecryptor(decryptionProperties)); + + List<BlockMetaData> blockMetaDataList = new ArrayList<>(); + ParquetMetadata metadata = new ParquetMetadata(fileMetaData, blockMetaDataList); + ParquetMetadata.toJSON(metadata); + System.out.println(ParquetMetadata.toPrettyJSON(metadata)); + } + @Test public void testMetadataToJson() { ParquetMetadata metadata = new ParquetMetadata(null, null); diff --git a/parquet-jackson/pom.xml b/parquet-jackson/pom.xml index 6bd860fd8..22453aae1 100644 --- a/parquet-jackson/pom.xml +++ b/parquet-jackson/pom.xml @@ -38,11 +38,22 @@ <artifactId>jackson-core</artifactId> <version>${jackson.version}</version> </dependency> + <dependency> + <groupId>${jackson.groupId}</groupId> + <artifactId>jackson-annotations</artifactId> + <version>${jackson.version}</version> + </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-databind</artifactId> <version>${jackson-databind.version}</version> </dependency> + <!-- Add support for Java 8 Optional --> + <dependency> + <groupId>com.fasterxml.jackson.datatype</groupId> + <artifactId>jackson-datatype-jdk8</artifactId> + <version>${jackson-modules-java8.version}</version> + </dependency> </dependencies> <properties> @@ -70,6 +81,7 @@ <artifactSet> <includes> <include>${jackson.groupId}:*</include> + <include>${jackson.datatype.groupId}:*</include> </includes> </artifactSet> <filters> @@ -79,6 +91,12 @@ <include>**</include> </includes> </filter> + <filter> + <artifact>${jackson.datatype.groupId}:*</artifact> + <includes> + <include>**</include> + </includes> + </filter> </filters> <relocations> <relocation> diff --git a/pom.xml b/pom.xml index 59ad34d94..73d89ebb5 100644 --- a/pom.xml +++ b/pom.xml @@ -68,6 +68,7 @@ <jackson.package>com.fasterxml.jackson</jackson.package> <jackson.version>2.17.0</jackson.version> <jackson-databind.version>2.17.0</jackson-databind.version> + <jackson-modules-java8.version>2.17.0</jackson-modules-java8.version> <japicmp.version>0.21.0</japicmp.version> <javax.annotation.version>1.3.2</javax.annotation.version> <spotless.version>2.30.0</spotless.version>