This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0-beta in repository https://gitbox.apache.org/repos/asf/doris.git
commit f3dade924d24de04fbc1b1300723018e0a2c283a Author: Yulei-Yang <[email protected]> AuthorDate: Mon Jun 5 11:01:24 2023 +0800 [fix](multicatalog) support read from hive table with HIVE_UNION_SUBDIR in path location (#20329) --- .../org/apache/doris/datasource/hive/HiveMetaStoreCache.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index 9d63797d5b..be073a0d09 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -289,7 +289,14 @@ public class HiveMetaStoreCache { result.setSplittable(HiveUtil.isSplittable(inputFormat, new Path(location), jobConf)); RemoteFileSystem fs = FileSystemFactory.getByLocation(location, jobConf); try { - RemoteFiles locatedFiles = fs.listLocatedFiles(location, true, false); + // For Tez engine, it may generate subdirectoies for "union" query. + // So there may be files and directories in the table directory at the same time. eg: + // /user/hive/warehouse/region_tmp_union_all2/000000_0 + // /user/hive/warehouse/region_tmp_union_all2/1 + // /user/hive/warehouse/region_tmp_union_all2/2 + // So we need to recursively list data location. + // https://blog.actorsfit.com/a?ID=00550-ce56ec63-1bff-4b0c-a6f7-447b93efaa31 + RemoteFiles locatedFiles = fs.listLocatedFiles(location, true, true); locatedFiles.files().forEach(result::addFile); } catch (Exception e) { // User may manually remove partition under HDFS, in this case, --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
