[hive] branch master updated: HIVE-25727: Iceberg hive catalog should create table object with initialised SerdeParams (Marton Bod, reviewed by Peter Vary)
This is an automated email from the ASF dual-hosted git repository. mbod pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new e98940f HIVE-25727: Iceberg hive catalog should create table object with initialised SerdeParams (Marton Bod, reviewed by Peter Vary) e98940f is described below commit e98940feb689e7c5c7ebd08a53ad7a5749a9e049 Author: Marton Bod AuthorDate: Fri Nov 19 23:41:55 2021 +0100 HIVE-25727: Iceberg hive catalog should create table object with initialised SerdeParams (Marton Bod, reviewed by Peter Vary) --- .../src/main/java/org/apache/iceberg/hive/HiveTableOperations.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 0ade33f..ccde59c 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -70,6 +70,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.BiMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableBiMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.util.Tasks; import org.apache.thrift.TException; import org.slf4j.Logger; @@ -403,6 +404,7 @@ public class HiveTableOperations extends BaseMetastoreTableOperations { storageDescriptor.setCols(HiveSchemaUtil.convert(metadata.schema())); storageDescriptor.setLocation(metadata.location()); SerDeInfo serDeInfo = new SerDeInfo(); +serDeInfo.setParameters(Maps.newHashMap()); if (hiveEngineEnabled) { storageDescriptor.setInputFormat("org.apache.iceberg.mr.hive.HiveIcebergInputFormat"); storageDescriptor.setOutputFormat("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat");
[hive] branch master updated: HIVE-25658: Addendum: Fix regex for masking totalSize table properties in Iceberg q-tests (Marton Bod, reviewed by Peter Vary)
This is an automated email from the ASF dual-hosted git repository. mbod pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new a550003 HIVE-25658: Addendum: Fix regex for masking totalSize table properties in Iceberg q-tests (Marton Bod, reviewed by Peter Vary) a550003 is described below commit a5500035c12d94de9094d880822776d276817439 Author: Marton Bod AuthorDate: Fri Nov 19 23:41:17 2021 +0100 HIVE-25658: Addendum: Fix regex for masking totalSize table properties in Iceberg q-tests (Marton Bod, reviewed by Peter Vary) --- .../test/queries/positive/create_iceberg_table.q| 3 +++ .../create_iceberg_table_stored_as_fileformat.q | 3 +++ .../create_iceberg_table_stored_by_iceberg.q| 3 +++ ...g_table_stored_by_iceberg_with_serdeproperties.q | 3 +++ .../positive/describe_iceberg_metadata_tables.q | 3 +++ .../test/queries/positive/describe_iceberg_table.q | 3 +++ .../query_iceberg_metadata_of_partitioned_table.q | 4 .../query_iceberg_metadata_of_unpartitioned_table.q | 4 .../positive/query_iceberg_metadata_with_joins.q| 3 +++ .../queries/positive/truncate_force_iceberg_table.q | 4 ++-- .../test/queries/positive/truncate_iceberg_table.q | 4 ++-- .../results/positive/create_iceberg_table.q.out | 2 +- .../create_iceberg_table_stored_as_fileformat.q.out | 10 +- .../create_iceberg_table_stored_by_iceberg.q.out| 2 +- ...ble_stored_by_iceberg_with_serdeproperties.q.out | 2 +- .../results/positive/describe_iceberg_table.q.out | 8 ...ry_iceberg_metadata_of_unpartitioned_table.q.out | Bin 29346 -> 29566 bytes .../positive/truncate_force_iceberg_table.q.out | 2 +- .../results/positive/truncate_iceberg_table.q.out | 4 ++-- 19 files changed, 48 insertions(+), 19 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table.q b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table.q index a86d642..880b92b 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table.q @@ -1,3 +1,6 @@ +-- Mask the totalSize value as it can have slight variability, causing test flakiness +--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ + set hive.vectorized.execution.enabled=false; CREATE EXTERNAL TABLE ice_t (i int, s string, ts timestamp, d date) STORED BY ICEBERG; DESCRIBE FORMATTED ice_t; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_as_fileformat.q b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_as_fileformat.q index bc6adb8..ab1a309 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_as_fileformat.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_as_fileformat.q @@ -1,3 +1,6 @@ +-- Mask the totalSize value as it can have slight variability, causing test flakiness +--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ + set hive.vectorized.execution.enabled=false; DROP TABLE IF EXISTS ice_orc; CREATE EXTERNAL TABLE ice_orc (i int, s string, ts timestamp, d date) STORED BY ICEBERG STORED AS ORC; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg.q b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg.q index f932901..7b6f12a 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg.q @@ -1,3 +1,6 @@ +-- Mask the totalSize value as it can have slight variability, causing test flakiness +--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ + set hive.vectorized.execution.enabled=false; CREATE EXTERNAL TABLE ice_t (i int, s string, ts timestamp, d date) STORED BY ICEBERG; DESCRIBE FORMATTED ice_t; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q index 7c44eb6..47c1f52 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q @@ -1,3 +1,6 @@ +-- Mask the totalSize value as it can have slight variability, causing test flakiness +--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ + set hive.vectorized.execution.enabled=false; CREATE EXTERNAL TABLE ice_t (i int, s string, ts timestamp, d date) STORED BY ICEBERG WITH
[hive] branch master updated: HIVE-25700: Prevent deletion of Notification Events post restarts. (Ayush Saxena, reviewed by Pravin Kumar Sinha)
This is an automated email from the ASF dual-hosted git repository. pravin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new e8ba31a HIVE-25700: Prevent deletion of Notification Events post restarts. (Ayush Saxena, reviewed by Pravin Kumar Sinha) e8ba31a is described below commit e8ba31a7dcc35cc5744423614ed7474876dc563a Author: Ayush Saxena AuthorDate: Fri Nov 19 23:45:03 2021 +0530 HIVE-25700: Prevent deletion of Notification Events post restarts. (Ayush Saxena, reviewed by Pravin Kumar Sinha) --- .../hcatalog/listener/DbNotificationListener.java | 34 ++ .../ql/parse/TestReplWithJsonMessageFormat.java| 1 + .../hive/ql/parse/TestReplicationScenarios.java| 72 -- .../hadoop/hive/metastore/conf/MetastoreConf.java | 3 + 4 files changed, 106 insertions(+), 4 deletions(-) diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java index 4f442ce..7980d53 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java @@ -142,6 +142,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME; +import static org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars.EVENT_DB_LISTENER_CLEAN_STARTUP_WAIT_INTERVAL; /** * An implementation of {@link org.apache.hadoop.hive.metastore.MetaStoreEventListener} that @@ -254,6 +255,13 @@ public class DbNotificationListener extends TransactionalMetaStoreEventListener cleaner.setCleanupInterval(MetastoreConf.getTimeVar(getConf(), MetastoreConf.ConfVars.EVENT_DB_LISTENER_CLEAN_INTERVAL, TimeUnit.MILLISECONDS)); } + +if (key.equals(EVENT_DB_LISTENER_CLEAN_STARTUP_WAIT_INTERVAL.toString()) || key +.equals(EVENT_DB_LISTENER_CLEAN_STARTUP_WAIT_INTERVAL.getHiveName())) { + cleaner.setWaitInterval(MetastoreConf + .getTimeVar(getConf(), EVENT_DB_LISTENER_CLEAN_STARTUP_WAIT_INTERVAL, + TimeUnit.MILLISECONDS)); +} } /** @@ -1406,6 +1414,8 @@ public class DbNotificationListener extends TransactionalMetaStoreEventListener private final RawStore rs; private int ttl; private long sleepTime; +private long waitInterval; +private boolean isInTest; CleanerThread(Configuration conf, RawStore rs) { super("DB-Notification-Cleaner"); @@ -1413,14 +1423,34 @@ public class DbNotificationListener extends TransactionalMetaStoreEventListener this.rs = Objects.requireNonNull(rs); boolean isReplEnabled = MetastoreConf.getBoolVar(conf, ConfVars.REPLCMENABLED); + isInTest = conf.getBoolean(HiveConf.ConfVars.HIVE_IN_TEST_REPL.varname, false); ConfVars ttlConf = (isReplEnabled) ? ConfVars.REPL_EVENT_DB_LISTENER_TTL : ConfVars.EVENT_DB_LISTENER_TTL; setTimeToLive(MetastoreConf.getTimeVar(conf, ttlConf, TimeUnit.SECONDS)); setCleanupInterval( MetastoreConf.getTimeVar(conf, ConfVars.EVENT_DB_LISTENER_CLEAN_INTERVAL, TimeUnit.MILLISECONDS)); + setWaitInterval(MetastoreConf + .getTimeVar(conf, EVENT_DB_LISTENER_CLEAN_STARTUP_WAIT_INTERVAL, TimeUnit.MILLISECONDS)); } @Override public void run() { + LOG.info("Wait interval is {}", waitInterval); + if (waitInterval > 0) { +try { + LOG.info("Cleaner Thread Restarted and {} or {} is configured. So cleaner thread will startup post waiting " + + "{} ms", EVENT_DB_LISTENER_CLEAN_STARTUP_WAIT_INTERVAL, + EVENT_DB_LISTENER_CLEAN_STARTUP_WAIT_INTERVAL.getHiveName(), waitInterval); + Thread.sleep(waitInterval); +} catch (InterruptedException e) { + LOG.error("Failed during the initial wait before start.", e); + if(isInTest) { +Thread.currentThread().interrupt(); + } + return; +} +LOG.info("Completed Cleaner thread initial wait. Starting normal processing."); + } + while (true) { LOG.debug("Cleaner thread running"); try { @@ -1448,5 +1478,9 @@ public class DbNotificationListener extends TransactionalMetaStoreEventListener public void setCleanupInterval(long configInterval) { sleepTime = configInterval; } + +public void setWaitInterval(long waitInterval) { + this.waitInterval = waitInterval; +} } } diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplWithJsonMessageFormat.java
[hive] branch master updated: HIVE-25714: docker logs commands timeout regularily during testing (#2801) (Zoltan Haindrich reviewed by Stamatis Zampetakis)
This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 79098a7 HIVE-25714: docker logs commands timeout regularily during testing (#2801) (Zoltan Haindrich reviewed by Stamatis Zampetakis) 79098a7 is described below commit 79098a75baeffc683f73afb4788d4cb2b2484ad6 Author: Zoltan Haindrich AuthorDate: Fri Nov 19 11:54:55 2021 +0100 HIVE-25714: docker logs commands timeout regularily during testing (#2801) (Zoltan Haindrich reviewed by Stamatis Zampetakis) --- .../apache/hadoop/hive/ql/externalDB/AbstractExternalDB.java | 11 ++- .../hadoop/hive/metastore/dbinstall/rules/DatabaseRule.java | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/externalDB/AbstractExternalDB.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/externalDB/AbstractExternalDB.java index ba0c7ad..f328bfc 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/externalDB/AbstractExternalDB.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/externalDB/AbstractExternalDB.java @@ -36,7 +36,7 @@ import java.util.concurrent.TimeUnit; /** * The class is in charge of connecting and populating dockerized databases for qtest. - * + * * The database should have at least one root user (admin/superuser) able to modify every aspect of the system. The user * either exists by default when the database starts or must created right after startup. */ @@ -99,6 +99,7 @@ public abstract class AbstractExternalDB { reader = new BufferedReader(new InputStreamReader(proc.getErrorStream())); final StringBuilder errLines = new StringBuilder(); reader.lines().forEach(s -> errLines.append(s).append('\n')); +LOG.info("Result size: " + lines.length() + ";" + errLines.length()); return new ProcessResults(lines.toString(), errLines.toString(), proc.exitValue()); } @@ -120,7 +121,7 @@ public abstract class AbstractExternalDB { ProcessResults pr; do { Thread.sleep(1000); -pr = runCmd(buildLogCmd(), 5); +pr = runCmd(buildLogCmd(), 30); if (pr.rc != 0) { throw new RuntimeException("Failed to get docker logs"); } @@ -149,7 +150,7 @@ public abstract class AbstractExternalDB { /** * Return the name of the root user. - * + * * Override the method if the name of the root user must be different than the default. */ protected String getRootUser() { @@ -158,13 +159,13 @@ public abstract class AbstractExternalDB { /** * Return the password of the root user. - * + * * Override the method if the password must be different than the default. */ protected String getRootPassword() { return "qtestpassword"; } - + protected abstract String getJdbcUrl(); protected abstract String getJdbcDriver(); diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/DatabaseRule.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/DatabaseRule.java index f4e4388..4fc8d50 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/DatabaseRule.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/DatabaseRule.java @@ -132,7 +132,7 @@ public abstract class DatabaseRule extends ExternalResource { ProcessResults pr; do { Thread.sleep(1000); - pr = runCmd(buildLogCmd(), 5); + pr = runCmd(buildLogCmd(), 30); if (pr.rc != 0) { throw new RuntimeException("Failed to get docker logs"); } @@ -185,6 +185,7 @@ public abstract class DatabaseRule extends ExternalResource { reader = new BufferedReader(new InputStreamReader(proc.getErrorStream())); final StringBuilder errLines = new StringBuilder(); reader.lines().forEach(s -> errLines.append(s).append('\n')); +LOG.info("Result size: " + lines.length() + ";" + errLines.length()); return new ProcessResults(lines.toString(), errLines.toString(), proc.exitValue()); }
[hive] branch master updated: HIVE-25509: CLIService.closeOperation should not fail if operation handle is not present (#2627) (Laszlo Bodor reviewed by Mahesh Kumar Behera)
This is an automated email from the ASF dual-hosted git repository. abstractdog pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new b76c640 HIVE-25509: CLIService.closeOperation should not fail if operation handle is not present (#2627) (Laszlo Bodor reviewed by Mahesh Kumar Behera) b76c640 is described below commit b76c64024ac340c7a1d764f68bac15501b2bc8d6 Author: Bodor Laszlo AuthorDate: Fri Nov 19 10:44:09 2021 +0100 HIVE-25509: CLIService.closeOperation should not fail if operation handle is not present (#2627) (Laszlo Bodor reviewed by Mahesh Kumar Behera) --- .../java/org/apache/hive/jdbc/HiveStatement.java | 40 +- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java b/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java index 609b3d7..ce8197b 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java +++ b/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java @@ -195,7 +195,9 @@ public class HiveStatement implements java.sql.Statement { if (stmtHandle.isPresent()) { TCloseOperationReq closeReq = new TCloseOperationReq(stmtHandle.get()); TCloseOperationResp closeResp = client.CloseOperation(closeReq); -Utils.verifySuccessWithInfo(closeResp.getStatus()); +if (!checkInvalidOperationHandle(closeResp)) { + Utils.verifySuccessWithInfo(closeResp.getStatus()); +} } } catch (SQLException e) { throw e; @@ -214,6 +216,42 @@ public class HiveStatement implements java.sql.Statement { } } + /** + * Invalid OperationHandle is a special case in HS2, which sometimes could be considered as safe to ignore. + * For instance: if the client retried due to HIVE-24786, and the retried operation happened to be the + * closeOperation, we don't care as the query might have already been removed from HS2's scope. + * @return true, if the response from server contains "Invalid OperationHandle" + */ + private boolean checkInvalidOperationHandle(TCloseOperationResp closeResp) { +List messages = closeResp.getStatus().getInfoMessages(); +if (messages != null && messages.size() > 0) { + /* + * Here we need to handle 2 different cases, which can happen in CLIService.closeOperation, which actually does: + * sessionManager.getOperationManager().getOperation(opHandle).getParentSession().closeOperation(opHandle); + */ + String message = messages.get(0); + if (message.contains("Invalid OperationHandle")) { +/* + * This happens when the first request properly removes the operation handle, then second request arrives, calls + * sessionManager.getOperationManager().getOperation(opHandle), and it doesn't find the handle. + */ +LOG.warn("'Invalid OperationHandle' on server side (messages: " + messages + ")"); +return true; + } else if (message.contains("Operation does not exist")) { +/* + * This is an extremely rare case, which represents a race condition when the first and second request + * arrives almost at the same time, both can get the OperationHandle instance + * from sessionManager's OperationManager, but the second fails, because it cannot get it again from the + * session's OperationManager, because it has been already removed in the meantime. + */ +LOG.warn("'Operation does not exist' on server side (messages: " + messages + ")"); +return true; + } +} + +return false; + } + void closeClientOperation() throws SQLException { try { closeStatementIfNeeded();
[hive] branch master updated: HIVE-25689: Remove deprecated DataUtil from iceberg-handler (Adam Szita, reviewed by Laszlo Pinter and Stamatis Zampetakis)
This is an automated email from the ASF dual-hosted git repository. szita pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 5ad2c80 HIVE-25689: Remove deprecated DataUtil from iceberg-handler (Adam Szita, reviewed by Laszlo Pinter and Stamatis Zampetakis) 5ad2c80 is described below commit 5ad2c8042ddc9fc1719c641e55a9f3d2faa9a6c4 Author: Adam Szita <40628386+sz...@users.noreply.github.com> AuthorDate: Fri Nov 19 09:25:44 2021 +0100 HIVE-25689: Remove deprecated DataUtil from iceberg-handler (Adam Szita, reviewed by Laszlo Pinter and Stamatis Zampetakis) --- .../java/org/apache/iceberg/mr/hive/DataUtil.java | 211 - .../org/apache/iceberg/mr/hive/HiveTableUtil.java | 5 +- .../alter_multi_part_table_to_iceberg.q.out| 2 +- .../positive/alter_part_table_to_iceberg.q.out | 2 +- .../results/positive/alter_table_to_iceberg.q.out | 2 +- .../truncate_partitioned_iceberg_table.q.out | 2 +- 6 files changed, 7 insertions(+), 217 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/DataUtil.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/DataUtil.java deleted file mode 100644 index 85398d1..000 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/DataUtil.java +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.iceberg.mr.hive; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hive.iceberg.org.apache.parquet.hadoop.ParquetFileReader; -import org.apache.hive.iceberg.org.apache.parquet.hadoop.metadata.ParquetMetadata; -import org.apache.iceberg.DataFile; -import org.apache.iceberg.DataFiles; -import org.apache.iceberg.Metrics; -import org.apache.iceberg.MetricsConfig; -import org.apache.iceberg.PartitionField; -import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.hadoop.HadoopInputFile; -import org.apache.iceberg.mapping.NameMapping; -import org.apache.iceberg.orc.OrcMetrics; -import org.apache.iceberg.parquet.ParquetUtil; - -/** - * @deprecated use org.apache.iceberg.data.DataUtil once Iceberg 0.12 is released. - */ -@Deprecated -public class DataUtil { - - private DataUtil() { - } - - private static final PathFilter HIDDEN_PATH_FILTER = - p -> !p.getName().startsWith("_") && !p.getName().startsWith("."); - -/** - * Returns the data files in a partition by listing the partition location. - * - * For Parquet and ORC partitions, this will read metrics from the file footer. For Avro partitions, - * metrics are set to null. - * @deprecated use org.apache.iceberg.data.DataUtil#listPartition() once Iceberg 0.12 is released. - * - * @param partitionKeys partition key, e.g., "a=1/b=2" - * @param uri partition location URI - * @param format partition format, avro, parquet or orc - * @param spec a partition spec - * @param conf a Hadoop conf - * @param metricsConfig a metrics conf - * @return a List of DataFile - */ - @Deprecated - public static List listPartition(Map partitionKeys, String uri, String format, - PartitionSpec spec, Configuration conf, MetricsConfig metricsConfig) { -return listPartition(partitionKeys, uri, format, spec, conf, metricsConfig, null); - } - -/** - * Returns the data files in a partition by listing the partition location. - * - * For Parquet and ORC partitions, this will read metrics from the file footer. For Avro partitions, - * metrics are set to null. - * - * Note: certain metrics, like NaN counts, that are only supported by iceberg file writers but not file footers, - * will not be populated. -