This is an automated email from the ASF dual-hosted git repository.

szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 5ad2c80  HIVE-25689: Remove deprecated DataUtil from iceberg-handler 
(Adam Szita, reviewed by Laszlo Pinter and Stamatis Zampetakis)
5ad2c80 is described below

commit 5ad2c8042ddc9fc1719c641e55a9f3d2faa9a6c4
Author: Adam Szita <40628386+sz...@users.noreply.github.com>
AuthorDate: Fri Nov 19 09:25:44 2021 +0100

    HIVE-25689: Remove deprecated DataUtil from iceberg-handler (Adam Szita, 
reviewed by Laszlo Pinter and Stamatis Zampetakis)
---
 .../java/org/apache/iceberg/mr/hive/DataUtil.java  | 211 ---------------------
 .../org/apache/iceberg/mr/hive/HiveTableUtil.java  |   5 +-
 .../alter_multi_part_table_to_iceberg.q.out        |   2 +-
 .../positive/alter_part_table_to_iceberg.q.out     |   2 +-
 .../results/positive/alter_table_to_iceberg.q.out  |   2 +-
 .../truncate_partitioned_iceberg_table.q.out       |   2 +-
 6 files changed, 7 insertions(+), 217 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/DataUtil.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/DataUtil.java
deleted file mode 100644
index 85398d1..0000000
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/DataUtil.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.iceberg.mr.hive;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.hive.iceberg.org.apache.parquet.hadoop.ParquetFileReader;
-import 
org.apache.hive.iceberg.org.apache.parquet.hadoop.metadata.ParquetMetadata;
-import org.apache.iceberg.DataFile;
-import org.apache.iceberg.DataFiles;
-import org.apache.iceberg.Metrics;
-import org.apache.iceberg.MetricsConfig;
-import org.apache.iceberg.PartitionField;
-import org.apache.iceberg.PartitionSpec;
-import org.apache.iceberg.hadoop.HadoopInputFile;
-import org.apache.iceberg.mapping.NameMapping;
-import org.apache.iceberg.orc.OrcMetrics;
-import org.apache.iceberg.parquet.ParquetUtil;
-
-/**
- * @deprecated use org.apache.iceberg.data.DataUtil once Iceberg 0.12 is 
released.
- */
-@Deprecated
-public class DataUtil {
-
-  private DataUtil() {
-  }
-
-  private static final PathFilter HIDDEN_PATH_FILTER =
-      p -> !p.getName().startsWith("_") && !p.getName().startsWith(".");
-
-    /**
-     * Returns the data files in a partition by listing the partition location.
-     *
-     * For Parquet and ORC partitions, this will read metrics from the file 
footer. For Avro partitions,
-     * metrics are set to null.
-     * @deprecated use org.apache.iceberg.data.DataUtil#listPartition() once 
Iceberg 0.12 is released.
-     *
-     * @param partitionKeys partition key, e.g., "a=1/b=2"
-     * @param uri partition location URI
-     * @param format partition format, avro, parquet or orc
-     * @param spec a partition spec
-     * @param conf a Hadoop conf
-     * @param metricsConfig a metrics conf
-     * @return a List of DataFile
-     */
-  @Deprecated
-  public static List<DataFile> listPartition(Map<String, String> 
partitionKeys, String uri, String format,
-                                             PartitionSpec spec, Configuration 
conf, MetricsConfig metricsConfig) {
-    return listPartition(partitionKeys, uri, format, spec, conf, 
metricsConfig, null);
-  }
-
-    /**
-     * Returns the data files in a partition by listing the partition location.
-     * <p>
-     * For Parquet and ORC partitions, this will read metrics from the file 
footer. For Avro partitions,
-     * metrics are set to null.
-     * <p>
-     * Note: certain metrics, like NaN counts, that are only supported by 
iceberg file writers but not file footers,
-     * will not be populated.
-     * @deprecated use org.apache.iceberg.data.DataUtil#listPartition() once 
Iceberg 0.12 is released.
-     *
-     * @param partitionKeys partition key, e.g., "a=1/b=2"
-     * @param uri partition location URI
-     * @param format partition format, avro, parquet or orc
-     * @param spec a partition spec
-     * @param conf a Hadoop conf
-     * @param metricsConfig a metrics conf
-     * @param mapping a name mapping
-     * @return a List of DataFile
-     */
-  @Deprecated
-  public static List<DataFile> listPartition(Map<String, String> 
partitionKeys, String uri, String format,
-                                             PartitionSpec spec, Configuration 
conf, MetricsConfig metricsConfig,
-                                             NameMapping mapping) {
-    if (format.contains("avro")) {
-      return listAvroPartition(partitionKeys, uri, spec, conf);
-    } else if (format.contains("parquet")) {
-      return listParquetPartition(partitionKeys, uri, spec, conf, 
metricsConfig, mapping);
-    } else if (format.contains("orc")) {
-      return listOrcPartition(partitionKeys, uri, spec, conf, metricsConfig, 
mapping);
-    } else {
-      throw new UnsupportedOperationException("Unknown partition format: " + 
format);
-    }
-  }
-
-  private static List<DataFile> listAvroPartition(Map<String, String> 
partitionPath, String partitionUri,
-                                                  PartitionSpec spec, 
Configuration conf) {
-    try {
-      Path partition = new Path(partitionUri);
-      FileSystem fs = partition.getFileSystem(conf);
-      return Arrays.stream(fs.listStatus(partition, HIDDEN_PATH_FILTER))
-              .filter(FileStatus::isFile)
-              .map(stat -> {
-                // Avro file statistics cannot be calculated without reading 
the file.
-                // Setting the rowCount to 0 is just a workaround so that the 
DataFiles.Builder.build() doesn't fail.
-                Metrics metrics = new Metrics(0L, null, null, null);
-                String partitionKey = spec.fields().stream()
-                        .map(PartitionField::name)
-                        .map(name -> String.format("%s=%s", name, 
partitionPath.get(name)))
-                        .collect(Collectors.joining("/"));
-
-                return DataFiles.builder(spec)
-                        .withPath(stat.getPath().toString())
-                        .withFormat("avro")
-                        .withFileSizeInBytes(stat.getLen())
-                        .withMetrics(metrics)
-                        .withPartitionPath(partitionKey)
-                        .build();
-
-              }).collect(Collectors.toList());
-    } catch (IOException e) {
-      throw new RuntimeException("Unable to list files in partition: " + 
partitionUri, e);
-    }
-  }
-
-  private static List<DataFile> listParquetPartition(Map<String, String> 
partitionPath, String partitionUri,
-                                                     PartitionSpec spec, 
Configuration conf,
-                                                     MetricsConfig 
metricsSpec, NameMapping mapping) {
-    try {
-      Path partition = new Path(partitionUri);
-      FileSystem fs = partition.getFileSystem(conf);
-
-      return Arrays.stream(fs.listStatus(partition, HIDDEN_PATH_FILTER))
-              .filter(FileStatus::isFile)
-              .map(stat -> {
-                Metrics metrics;
-                try {
-                  ParquetMetadata metadata = 
ParquetFileReader.readFooter(conf, stat);
-                  metrics = ParquetUtil.footerMetrics(metadata, 
Stream.empty(), metricsSpec, mapping);
-                } catch (IOException e) {
-                  throw new RuntimeException("Unable to read the footer of the 
parquet file: " +
-                          stat.getPath(), e);
-                }
-                String partitionKey = spec.fields().stream()
-                        .map(PartitionField::name)
-                        .map(name -> String.format("%s=%s", name, 
partitionPath.get(name)))
-                        .collect(Collectors.joining("/"));
-
-                return DataFiles.builder(spec)
-                        .withPath(stat.getPath().toString())
-                        .withFormat("parquet")
-                        .withFileSizeInBytes(stat.getLen())
-                        .withMetrics(metrics)
-                        .withPartitionPath(partitionKey)
-                        .build();
-              }).collect(Collectors.toList());
-    } catch (IOException e) {
-      throw new RuntimeException("Unable to list files in partition: " + 
partitionUri, e);
-    }
-  }
-
-  private static List<DataFile> listOrcPartition(Map<String, String> 
partitionPath, String partitionUri,
-                                                 PartitionSpec spec, 
Configuration conf,
-                                                 MetricsConfig metricsSpec, 
NameMapping mapping) {
-    try {
-      Path partition = new Path(partitionUri);
-      FileSystem fs = partition.getFileSystem(conf);
-
-      return Arrays.stream(fs.listStatus(partition, HIDDEN_PATH_FILTER))
-              .filter(FileStatus::isFile)
-              .map(stat -> {
-                Metrics metrics = 
OrcMetrics.fromInputFile(HadoopInputFile.fromPath(stat.getPath(), conf),
-                        metricsSpec, mapping);
-                String partitionKey = spec.fields().stream()
-                        .map(PartitionField::name)
-                        .map(name -> String.format("%s=%s", name, 
partitionPath.get(name)))
-                        .collect(Collectors.joining("/"));
-
-                return DataFiles.builder(spec)
-                        .withPath(stat.getPath().toString())
-                        .withFormat("orc")
-                        .withFileSizeInBytes(stat.getLen())
-                        .withMetrics(metrics)
-                        .withPartitionPath(partitionKey)
-                        .build();
-
-              }).collect(Collectors.toList());
-    } catch (IOException e) {
-      throw new RuntimeException("Unable to list files in partition: " + 
partitionUri, e);
-    }
-  }
-
-
-}
diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
index fccea42..a975cd6 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
@@ -45,6 +45,7 @@ import org.apache.iceberg.MetricsConfig;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.data.TableMigrationUtil;
 import org.apache.iceberg.mapping.NameMapping;
 import org.apache.iceberg.mapping.NameMappingParser;
 import org.apache.iceberg.mr.Catalogs;
@@ -128,8 +129,8 @@ public class HiveTableUtil {
       if (fileName.startsWith(".") || fileName.startsWith("_")) {
         continue;
       }
-      dataFiles.addAll(DataUtil.listPartition(partitionKeys, 
fileStatus.getPath().toString(), format, spec, conf,
-          metricsConfig, nameMapping));
+      dataFiles.addAll(TableMigrationUtil.listPartition(partitionKeys, 
fileStatus.getPath().toString(), format, spec,
+          conf, metricsConfig, nameMapping));
     }
     return dataFiles;
   }
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out
index 2d00101..7c71d12 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out
@@ -668,7 +668,7 @@ Table Parameters:
 #### A masked pattern was here ####
        metadata_location       hdfs://### HDFS PATH ###
        numFiles                7                   
-       numRows                 0                   
+       numRows                 15                  
        previous_metadata_location      hdfs://### HDFS PATH ###
        storage_handler         
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
        table_type              ICEBERG             
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out
index 46e4797..6bc2192 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out
@@ -530,7 +530,7 @@ Table Parameters:
 #### A masked pattern was here ####
        metadata_location       hdfs://### HDFS PATH ###
        numFiles                4                   
-       numRows                 0                   
+       numRows                 9                   
        previous_metadata_location      hdfs://### HDFS PATH ###
        storage_handler         
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
        table_type              ICEBERG             
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/alter_table_to_iceberg.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/alter_table_to_iceberg.q.out
index d278e75..d69c983 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/alter_table_to_iceberg.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/alter_table_to_iceberg.q.out
@@ -389,7 +389,7 @@ Table Parameters:
 #### A masked pattern was here ####
        metadata_location       hdfs://### HDFS PATH ###
        numFiles                1                   
-       numRows                 0                   
+       numRows                 5                   
        previous_metadata_location      hdfs://### HDFS PATH ###
        rawDataSize             0                   
        storage_handler         
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out
index 0a63478..d67d125 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out
@@ -100,7 +100,7 @@ Table Parameters:
 #### A masked pattern was here ####
        metadata_location       hdfs://### HDFS PATH ###
        numFiles                4                   
-       numRows                 0                   
+       numRows                 9                   
        previous_metadata_location      hdfs://### HDFS PATH ###
        storage_handler         
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
        table_type              ICEBERG             

Reply via email to