[ 
https://issues.apache.org/jira/browse/DRILL-7063?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16810105#comment-16810105
 ] 

ASF GitHub Bot commented on DRILL-7063:
---------------------------------------

amansinha100 commented on pull request #1723: DRILL-7063: Seperate metadata 
cache file into summary, file metadata
URL: https://github.com/apache/drill/pull/1723#discussion_r272281084
 
 

 ##########
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata_V4.java
 ##########
 @@ -0,0 +1,672 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.parquet.metadata;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.JsonSerializer;
+import com.fasterxml.jackson.databind.KeyDeserializer;
+import com.fasterxml.jackson.databind.SerializerProvider;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.ConcurrentHashMap;
+import org.apache.drill.common.expression.SchemaPath;
+
+import static 
org.apache.drill.exec.store.parquet.metadata.MetadataBase.ColumnMetadata;
+import static 
org.apache.drill.exec.store.parquet.metadata.MetadataBase.ParquetFileMetadata;
+import static 
org.apache.drill.exec.store.parquet.metadata.MetadataBase.ParquetTableMetadataBase;
+import static 
org.apache.drill.exec.store.parquet.metadata.MetadataBase.RowGroupMetadata;
+import static 
org.apache.drill.exec.store.parquet.metadata.MetadataBase.ColumnTypeMetadata;
+import static 
org.apache.drill.exec.store.parquet.metadata.MetadataVersion.Constants.V4;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.PrimitiveType;
+
+public class Metadata_V4 {
+
+  public static class ParquetTableMetadata_v4 extends ParquetTableMetadataBase 
{
+
+    MetadataSummary metadataSummary = new MetadataSummary();
+    FileMetadata fileMetadata = new FileMetadata();
+
+    public ParquetTableMetadata_v4(MetadataSummary metadataSummary) {
+      this.metadataSummary = metadataSummary;
+    }
+
+    public ParquetTableMetadata_v4(MetadataSummary metadataSummary, 
FileMetadata fileMetadata) {
+      this.metadataSummary = metadataSummary;
+      this.fileMetadata = fileMetadata;
+    }
+
+    public ParquetTableMetadata_v4(String metadataVersion, 
ParquetTableMetadataBase parquetTableMetadata,
+                                   List<ParquetFileMetadata_v4> files, 
List<Path> directories, String drillVersion, long totalRowCount, boolean 
allColumnsInteresting) {
+      this.metadataSummary.metadataVersion = metadataVersion;
+      this.fileMetadata.files = files;
+      this.metadataSummary.directories = directories;
+      this.metadataSummary.columnTypeInfo = ((ParquetTableMetadata_v4) 
parquetTableMetadata).metadataSummary.columnTypeInfo;
+      this.metadataSummary.drillVersion = drillVersion;
+      this.metadataSummary.totalRowCount = totalRowCount;
+      this.metadataSummary.allColumnsInteresting = allColumnsInteresting;
+    }
+
+    public ColumnTypeMetadata_v4 getColumnTypeInfo(String[] name) {
+      return metadataSummary.getColumnTypeInfo(name);
+    }
+
+    @JsonIgnore
+    @Override
+    public List<Path> getDirectories() {
+      return metadataSummary.getDirectories();
+    }
+
+    @Override
+    public List<? extends ParquetFileMetadata> getFiles() {
+      return fileMetadata.getFiles();
+    }
+
+    @Override
+    public String getMetadataVersion() {
+      return metadataSummary.getMetadataVersion();
+    }
+
+    /**
+     * If directories list and file metadata list contain relative paths, 
update it to absolute ones
+     *
+     * @param baseDir base parent directory
+     */
+    public void updateRelativePaths(String baseDir) {
+      // update directories paths to absolute ones
+      this.metadataSummary.directories = 
MetadataPathUtils.convertToAbsolutePaths(metadataSummary.directories, baseDir);
+
+      // update files paths to absolute ones
+      this.fileMetadata.files = 
MetadataPathUtils.convertToFilesWithAbsolutePathsForV4(fileMetadata.files, 
baseDir);
+    }
+
+    @Override
+    public void assignFiles(List<? extends ParquetFileMetadata> newFiles) {
+      this.fileMetadata.assignFiles(newFiles);
+    }
+
+    @Override
+    public boolean hasColumnMetadata() {
+      return true;
+    }
+
+    @Override
+    public PrimitiveType.PrimitiveTypeName getPrimitiveType(String[] 
columnName) {
+      return getColumnTypeInfo(columnName).primitiveType;
+    }
+
+    @Override
+    public OriginalType getOriginalType(String[] columnName) {
+      return getColumnTypeInfo(columnName).originalType;
+    }
+
+    @Override
+    public Integer getRepetitionLevel(String[] columnName) {
+      return getColumnTypeInfo(columnName).repetitionLevel;
+    }
+
+    @Override
+    public Integer getDefinitionLevel(String[] columnName) {
+      return getColumnTypeInfo(columnName).definitionLevel;
+    }
+
+    @Override
+    public Integer getScale(String[] columnName) {
+      return getColumnTypeInfo(columnName).scale;
+    }
+
+    @Override
+    public Integer getPrecision(String[] columnName) {
+      return getColumnTypeInfo(columnName).precision;
+    }
+
+    @Override
+    public boolean isRowGroupPrunable() {
+      return true;
+    }
+
+    @Override
+    public ParquetTableMetadataBase clone() {
+      return new ParquetTableMetadata_v4(metadataSummary, fileMetadata);
+    }
+
+    @Override
+    public String getDrillVersion() {
+      return metadataSummary.drillVersion;
+    }
+
 
 Review comment:
   Code style..please remove extra blank lines throughout this file
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Create separate summary file for schema, totalRowCount, totalNullCount 
> (includes maintenance)
> ---------------------------------------------------------------------------------------------
>
>                 Key: DRILL-7063
>                 URL: https://issues.apache.org/jira/browse/DRILL-7063
>             Project: Apache Drill
>          Issue Type: Sub-task
>          Components: Metadata
>            Reporter: Venkata Jyothsna Donapati
>            Assignee: Venkata Jyothsna Donapati
>            Priority: Major
>             Fix For: 1.16.0
>
>   Original Estimate: 252h
>  Remaining Estimate: 252h
>




--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to