This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch parquet-1.14.x
in repository https://gitbox.apache.org/repos/asf/parquet-java.git


The following commit(s) were added to refs/heads/parquet-1.14.x by this push:
     new 9c8fde0c7 PARQUET-2468: ParquetMetadata must convert to json (#1349) 
(#1360)
9c8fde0c7 is described below

commit 9c8fde0c7a8bacaba28c9e0a953aba0a83b25fdd
Author: Gang Wu <ust...@gmail.com>
AuthorDate: Fri May 31 09:39:23 2024 +0800

    PARQUET-2468: ParquetMetadata must convert to json (#1349) (#1360)
    
    Co-authored-by: Michel Davit <mic...@davit.fr>
---
 parquet-hadoop/pom.xml                             | 10 ++++++
 .../hadoop/metadata/ColumnChunkMetaData.java       |  4 +++
 .../hadoop/metadata/ColumnChunkProperties.java     |  2 ++
 .../parquet/hadoop/metadata/FileMetaData.java      |  2 ++
 .../parquet/hadoop/metadata/ParquetMetadata.java   | 35 ++++++++++++------
 .../converter/TestParquetMetadataConverter.java    | 41 ++++++++++++++++++++--
 parquet-jackson/pom.xml                            | 18 ++++++++++
 pom.xml                                            |  1 +
 8 files changed, 100 insertions(+), 13 deletions(-)

diff --git a/parquet-hadoop/pom.xml b/parquet-hadoop/pom.xml
index 7d02ac54a..465d7c95f 100644
--- a/parquet-hadoop/pom.xml
+++ b/parquet-hadoop/pom.xml
@@ -118,11 +118,21 @@
       <artifactId>jackson-core</artifactId>
       <version>${jackson.version}</version>
     </dependency>
+    <dependency>
+      <groupId>${jackson.groupId}</groupId>
+      <artifactId>jackson-annotations</artifactId>
+      <version>${jackson.version}</version>
+    </dependency>
     <dependency>
       <groupId>${jackson.groupId}</groupId>
       <artifactId>jackson-databind</artifactId>
       <version>${jackson-databind.version}</version>
     </dependency>
+    <dependency>
+      <groupId>${jackson.datatype.groupId}</groupId>
+      <artifactId>jackson-datatype-jdk8</artifactId>
+      <version>${jackson-modules-java8.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.xerial.snappy</groupId>
       <artifactId>snappy-java</artifactId>
diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
index 3dac15ba7..14a949b0e 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
@@ -22,6 +22,7 @@ import static 
org.apache.parquet.column.Encoding.PLAIN_DICTIONARY;
 import static org.apache.parquet.column.Encoding.RLE_DICTIONARY;
 import static org.apache.parquet.format.Util.readColumnMetaData;
 
+import com.fasterxml.jackson.annotation.JsonIgnore;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.util.Set;
@@ -338,6 +339,7 @@ public abstract class ColumnChunkMetaData {
    * @deprecated will be removed in 2.0.0. Use {@link #getPrimitiveType()} 
instead.
    */
   @Deprecated
+  @JsonIgnore
   public PrimitiveTypeName getType() {
     decryptIfNeeded();
     return properties.getType();
@@ -380,6 +382,7 @@ public abstract class ColumnChunkMetaData {
   /**
    * @return the stats for this column
    */
+  @JsonIgnore
   public abstract Statistics getStatistics();
 
   /**
@@ -387,6 +390,7 @@ public abstract class ColumnChunkMetaData {
    *
    * @return the size stats for this column
    */
+  @JsonIgnore
   public SizeStatistics getSizeStatistics() {
     throw new UnsupportedOperationException("SizeStatistics is not 
implemented");
   }
diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java
 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java
index 3b0a33b14..026e37a1c 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java
@@ -18,6 +18,7 @@
  */
 package org.apache.parquet.hadoop.metadata;
 
+import com.fasterxml.jackson.annotation.JsonIgnore;
 import java.util.Arrays;
 import java.util.Set;
 import org.apache.parquet.column.Encoding;
@@ -76,6 +77,7 @@ public class ColumnChunkProperties {
    * @deprecated will be removed in 2.0.0. Use {@link #getPrimitiveType()} 
instead.
    */
   @Deprecated
+  @JsonIgnore
   public PrimitiveTypeName getType() {
     return type.getPrimitiveTypeName();
   }
diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/FileMetaData.java
 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/FileMetaData.java
index c608cd405..4143dd805 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/FileMetaData.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/FileMetaData.java
@@ -20,6 +20,7 @@ package org.apache.parquet.hadoop.metadata;
 
 import static java.util.Collections.unmodifiableMap;
 
+import com.fasterxml.jackson.annotation.JsonIgnore;
 import java.io.Serializable;
 import java.util.Map;
 import java.util.Objects;
@@ -109,6 +110,7 @@ public final class FileMetaData implements Serializable {
     return createdBy;
   }
 
+  @JsonIgnore
   public InternalFileDecryptor getFileDecryptor() {
     return fileDecryptor;
   }
diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java
 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java
index e30e872a6..640ecfba1 100755
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java
@@ -19,6 +19,9 @@
 package org.apache.parquet.hadoop.metadata;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectWriter;
+import com.fasterxml.jackson.databind.SerializationFeature;
+import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
 import java.io.IOException;
 import java.io.StringReader;
 import java.io.StringWriter;
@@ -32,6 +35,14 @@ public class ParquetMetadata {
 
   private static final ObjectMapper objectMapper = new ObjectMapper();
 
+  static {
+    // Enable FAIL_ON_EMPTY_BEANS on objectmapper. Without this feature 
parquet-casdacing tests fail,
+    // because LogicalTypeAnnotation implementations are classes without any 
property.
+    objectMapper.disable(SerializationFeature.FAIL_ON_EMPTY_BEANS);
+    // Add support for Java 8 Optional
+    objectMapper.registerModule(new Jdk8Module());
+  }
+
   /**
    * @param parquetMetaData an instance of parquet metadata to convert
    * @return the json representation
@@ -50,19 +61,23 @@ public class ParquetMetadata {
 
   private static String toJSON(ParquetMetadata parquetMetaData, boolean 
isPrettyPrint) {
     try (StringWriter stringWriter = new StringWriter()) {
+      Object objectToPrint;
+      if (parquetMetaData.getFileMetaData() == null
+          || parquetMetaData.getFileMetaData().getEncryptionType()
+              == FileMetaData.EncryptionType.UNENCRYPTED) {
+        objectToPrint = parquetMetaData;
+      } else {
+        objectToPrint = parquetMetaData.getFileMetaData();
+      }
+
+      ObjectWriter writer;
       if (isPrettyPrint) {
-        Object objectToPrint;
-        if (parquetMetaData.getFileMetaData() == null
-            || parquetMetaData.getFileMetaData().getEncryptionType()
-                == FileMetaData.EncryptionType.UNENCRYPTED) {
-          objectToPrint = parquetMetaData;
-        } else {
-          objectToPrint = parquetMetaData.getFileMetaData();
-        }
-        objectMapper.writerWithDefaultPrettyPrinter().writeValue(stringWriter, 
objectToPrint);
+        writer = objectMapper.writerWithDefaultPrettyPrinter();
       } else {
-        objectMapper.writeValue(stringWriter, parquetMetaData);
+        writer = objectMapper.writer();
       }
+
+      writer.writeValue(stringWriter, objectToPrint);
       return stringWriter.toString();
     } catch (IOException e) {
       throw new RuntimeException(e);
diff --git 
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
 
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index 4dcede624..2cffb5186 100644
--- 
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++ 
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -87,6 +87,10 @@ import org.apache.parquet.column.statistics.IntStatistics;
 import org.apache.parquet.column.statistics.LongStatistics;
 import org.apache.parquet.column.statistics.SizeStatistics;
 import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.crypto.DecryptionPropertiesFactory;
+import org.apache.parquet.crypto.EncryptionPropertiesFactory;
+import org.apache.parquet.crypto.FileDecryptionProperties;
+import org.apache.parquet.crypto.InternalFileDecryptor;
 import org.apache.parquet.example.Paper;
 import org.apache.parquet.example.data.Group;
 import org.apache.parquet.example.data.simple.SimpleGroup;
@@ -635,11 +639,16 @@ public class TestParquetMetadataConverter {
   }
 
   @Test
-  public void testNullFieldMetadataDebugLogging() {
+  public void testFieldMetadataDebugLogging() {
     MessageType schema = parseMessageType("message test { optional binary 
some_null_field; }");
     org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData =
-        new org.apache.parquet.hadoop.metadata.FileMetaData(schema, new 
HashMap<String, String>(), null);
-    List<BlockMetaData> blockMetaDataList = new ArrayList<BlockMetaData>();
+        new org.apache.parquet.hadoop.metadata.FileMetaData(
+            schema,
+            new HashMap<>(),
+            null,
+            
org.apache.parquet.hadoop.metadata.FileMetaData.EncryptionType.UNENCRYPTED,
+            null);
+    List<BlockMetaData> blockMetaDataList = new ArrayList<>();
     BlockMetaData blockMetaData = new BlockMetaData();
     blockMetaData.addColumn(createColumnChunkMetaData());
     blockMetaDataList.add(blockMetaData);
@@ -647,6 +656,32 @@ public class TestParquetMetadataConverter {
     ParquetMetadata.toJSON(metadata);
   }
 
+  @Test
+  public void testEncryptedFieldMetadataDebugLogging() {
+    Configuration conf = new Configuration();
+    conf.set(
+        EncryptionPropertiesFactory.CRYPTO_FACTORY_CLASS_PROPERTY_NAME,
+        "org.apache.parquet.crypto.SampleDecryptionPropertiesFactory");
+    DecryptionPropertiesFactory decryptionPropertiesFactory = 
DecryptionPropertiesFactory.loadFactory(conf);
+    FileDecryptionProperties decryptionProperties =
+        decryptionPropertiesFactory.getFileDecryptionProperties(conf, null);
+
+    MessageType schema = parseMessageType("message test { optional binary 
some_null_field; }");
+
+    org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData =
+        new org.apache.parquet.hadoop.metadata.FileMetaData(
+            schema,
+            new HashMap<>(),
+            null,
+            
org.apache.parquet.hadoop.metadata.FileMetaData.EncryptionType.ENCRYPTED_FOOTER,
+            new InternalFileDecryptor(decryptionProperties));
+
+    List<BlockMetaData> blockMetaDataList = new ArrayList<>();
+    ParquetMetadata metadata = new ParquetMetadata(fileMetaData, 
blockMetaDataList);
+    ParquetMetadata.toJSON(metadata);
+    System.out.println(ParquetMetadata.toPrettyJSON(metadata));
+  }
+
   @Test
   public void testMetadataToJson() {
     ParquetMetadata metadata = new ParquetMetadata(null, null);
diff --git a/parquet-jackson/pom.xml b/parquet-jackson/pom.xml
index 6bd860fd8..22453aae1 100644
--- a/parquet-jackson/pom.xml
+++ b/parquet-jackson/pom.xml
@@ -38,11 +38,22 @@
       <artifactId>jackson-core</artifactId>
       <version>${jackson.version}</version>
     </dependency>
+    <dependency>
+      <groupId>${jackson.groupId}</groupId>
+      <artifactId>jackson-annotations</artifactId>
+      <version>${jackson.version}</version>
+    </dependency>
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-databind</artifactId>
       <version>${jackson-databind.version}</version>
     </dependency>
+    <!-- Add support for Java 8 Optional -->
+    <dependency>
+      <groupId>com.fasterxml.jackson.datatype</groupId>
+      <artifactId>jackson-datatype-jdk8</artifactId>
+      <version>${jackson-modules-java8.version}</version>
+    </dependency>
   </dependencies>
 
   <properties>
@@ -70,6 +81,7 @@
               <artifactSet>
                 <includes>
                   <include>${jackson.groupId}:*</include>
+                  <include>${jackson.datatype.groupId}:*</include>
                 </includes>
               </artifactSet>
               <filters>
@@ -79,6 +91,12 @@
                     <include>**</include>
                   </includes>
                 </filter>
+                <filter>
+                  <artifact>${jackson.datatype.groupId}:*</artifact>
+                  <includes>
+                    <include>**</include>
+                  </includes>
+                </filter>
               </filters>
               <relocations>
                 <relocation>
diff --git a/pom.xml b/pom.xml
index 59ad34d94..73d89ebb5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -68,6 +68,7 @@
     <jackson.package>com.fasterxml.jackson</jackson.package>
     <jackson.version>2.17.0</jackson.version>
     <jackson-databind.version>2.17.0</jackson-databind.version>
+    <jackson-modules-java8.version>2.17.0</jackson-modules-java8.version>
     <japicmp.version>0.21.0</japicmp.version>
     <javax.annotation.version>1.3.2</javax.annotation.version>
     <spotless.version>2.30.0</spotless.version>

Reply via email to