Repository: carbondata Updated Branches: refs/heads/master ed225085e -> 8e54f1e45
[CARBONDATA-2807] Fixed data load performance issue in Intermediate merger When number of records are high Problem: Data Loading is taking more time when number of records are high. Root cause: As number of records are high intermediate merger is taking more time. Solution: Checking the number of files present in file list is done is synchronized block because of this each intermediate request is taking sometime and when number of records are high it impacting overall data loading performance This closes #2588 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8e54f1e4 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8e54f1e4 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8e54f1e4 Branch: refs/heads/master Commit: 8e54f1e45c113f4cacc26af58f3492d07a312311 Parents: ed22508 Author: kumarvishal09 <kumarvishal1...@gmail.com> Authored: Mon Jul 30 21:21:09 2018 +0530 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Tue Aug 7 20:07:41 2018 +0530 ---------------------------------------------------------------------- .../unsafe/merger/UnsafeIntermediateMerger.java | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/8e54f1e4/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/merger/UnsafeIntermediateMerger.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/merger/UnsafeIntermediateMerger.java b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/merger/UnsafeIntermediateMerger.java index 0b4eae2..18f35d3 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/merger/UnsafeIntermediateMerger.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/unsafe/merger/UnsafeIntermediateMerger.java @@ -37,8 +37,6 @@ import org.apache.carbondata.processing.loading.sort.unsafe.UnsafeCarbonRowPage; import org.apache.carbondata.processing.sort.exception.CarbonSortKeyAndGroupByException; import org.apache.carbondata.processing.sort.sortdata.SortParameters; -import org.apache.commons.collections.list.SynchronizedList; - /** * It does mergesort intermediate files to big file. */ @@ -77,8 +75,7 @@ public class UnsafeIntermediateMerger { this.mergedPages = new ArrayList<>(); this.executorService = Executors.newFixedThreadPool(parameters.getNumberOfCores(), new CarbonThreadFactory("UnsafeIntermediatePool:" + parameters.getTableName())); - this.procFiles = - SynchronizedList.decorate(new ArrayList<File>(CarbonCommonConstants.CONSTANT_SIZE_TEN)); + this.procFiles = new ArrayList<>(CarbonCommonConstants.CONSTANT_SIZE_TEN); this.mergerTask = new ArrayList<>(); Integer spillPercentage = CarbonProperties.getInstance().getSortMemorySpillPercentage(); @@ -111,18 +108,15 @@ public class UnsafeIntermediateMerger { } public void startFileMergingIfPossible() { - File[] fileList = null; - synchronized (lockObject) { - if (procFiles.size() >= parameters.getNumberOfIntermediateFileToBeMerged()) { + File[] fileList; + if (procFiles.size() >= parameters.getNumberOfIntermediateFileToBeMerged()) { + synchronized (lockObject) { fileList = procFiles.toArray(new File[procFiles.size()]); this.procFiles = new ArrayList<File>(); - if (LOGGER.isDebugEnabled()) { - LOGGER - .debug("Submitting request for intermediate merging no of files: " + fileList.length); - } } - } - if (null != fileList) { + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("Sumitting request for intermediate merging no of files: " + fileList.length); + } startIntermediateMerging(fileList); } }