[HOTFIX] Removed file existence check to improve dataMap loading performance
Problem DataMap loading performance degraded after adding file existence check. Analysis When carbonIndex file is read and carbondata file path to its metadata Info map is prepared, file physical existence is getting checked every time which in case of HDFS file system is a namenode call. This degrades the dataMap loading performance. This is done to avoid failures for Handle IUD scenario where after delete operation carbondata file is deleted but the entry still exists in index file. Fix Modified code to check for physical file existence only in case when any IUD operation has happened on the table This closes #2560 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/fd747a3e Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/fd747a3e Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/fd747a3e Branch: refs/heads/external-format Commit: fd747a3eacee7196bc290e3ee17333627bf485a9 Parents: 3b9efed Author: manishgupta88 <tomanishgupt...@gmail.com> Authored: Fri Jul 27 14:11:11 2018 +0530 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Mon Jul 30 13:03:35 2018 +0530 ---------------------------------------------------------------------- .../carbondata/core/util/BlockletDataMapUtil.java | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/fd747a3e/core/src/main/java/org/apache/carbondata/core/util/BlockletDataMapUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/BlockletDataMapUtil.java b/core/src/main/java/org/apache/carbondata/core/util/BlockletDataMapUtil.java index 86e9f9c..db41e73 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/BlockletDataMapUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/BlockletDataMapUtil.java @@ -115,8 +115,14 @@ public class BlockletDataMapUtil { CarbonTable.updateTableByTableInfo(carbonTable, carbonTable.getTableInfo()); } String blockPath = footer.getBlockInfo().getTableBlockInfo().getFilePath(); - if (null == blockMetaInfoMap.get(blockPath) && FileFactory.isFileExist(blockPath)) { - blockMetaInfoMap.put(blockPath, createBlockMetaInfo(fileNameToMetaInfoMapping, blockPath)); + if (null == blockMetaInfoMap.get(blockPath)) { + BlockMetaInfo blockMetaInfo = createBlockMetaInfo(fileNameToMetaInfoMapping, blockPath); + // if blockMetaInfo is null that means the file has been deleted from the file system. + // This can happen in case IUD scenarios where after deleting or updating the data the + // complete block is deleted but the entry still exists in index or merge index file + if (null != blockMetaInfo) { + blockMetaInfoMap.put(blockPath, blockMetaInfo); + } } } return blockMetaInfoMap; @@ -152,10 +158,14 @@ public class BlockletDataMapUtil { } private static BlockMetaInfo createBlockMetaInfo( - Map<String, BlockMetaInfo> fileNameToMetaInfoMapping, String carbonDataFile) { + Map<String, BlockMetaInfo> fileNameToMetaInfoMapping, String carbonDataFile) + throws IOException { FileFactory.FileType fileType = FileFactory.getFileType(carbonDataFile); switch (fileType) { case LOCAL: + if (!FileFactory.isFileExist(carbonDataFile)) { + return null; + } CarbonFile carbonFile = FileFactory.getCarbonFile(carbonDataFile, fileType); return new BlockMetaInfo(new String[] { "localhost" }, carbonFile.getSize()); default: