Taewoo Kim has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/1144
Change subject: ASTERIXDB-1628: Fixed an issue in External Hash Group by ...................................................................... ASTERIXDB-1628: Fixed an issue in External Hash Group by - The number of partitions in External Hash Group By is now properly calculated by considering a corner case. Change-Id: I8901d2b64659fb0d2b97d73f45a9fe113232e860 --- M hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java 1 file changed, 11 insertions(+), 7 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/44/1144/1 diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java index f08d27d..85a7609 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java @@ -238,17 +238,21 @@ }; } - private int getNumOfPartitions(int nubmerOfFramesForData, int frameLimit) { - if (frameLimit > nubmerOfFramesForData) { + private int getNumOfPartitions(int nubmerOfFramesForDataAndHashTable, int frameLimit) { + if (frameLimit >= nubmerOfFramesForDataAndHashTable * FUDGE_FACTOR) { return 1; // all in memory, we will create a big partition } + // The formula is based on Shapiro's paper - http://cs.stanford.edu/people/chrismre/cs345/rl/shapiro.pdf. + // Check the page 249 for more details. int numberOfPartitions = (int) (Math - .ceil((nubmerOfFramesForData * FUDGE_FACTOR - frameLimit) / (frameLimit - 1))); - if (numberOfPartitions <= 0) { - numberOfPartitions = 1; //becomes in-memory hash - } + .ceil((nubmerOfFramesForDataAndHashTable * FUDGE_FACTOR - frameLimit) / (frameLimit - 1))); + // Actually, at this stage, we know that this is not a in-memory hash (#frames required > #frameLimit). + // So we want to guarantee that the number of partition is at least two because there is a corner case. + numberOfPartitions = Math.max(2, numberOfPartitions); + // If the number of partitions is greater than the memory budget, there might be a case that we can't + // allocate at least one frame for each partition in memory. So, we deal with those cases here. if (numberOfPartitions > frameLimit) { - numberOfPartitions = (int) Math.ceil(Math.sqrt(nubmerOfFramesForData * FUDGE_FACTOR)); + numberOfPartitions = (int) Math.ceil(Math.sqrt(nubmerOfFramesForDataAndHashTable * FUDGE_FACTOR)); return Math.max(2, Math.min(numberOfPartitions, frameLimit)); } return numberOfPartitions; -- To view, visit https://asterix-gerrit.ics.uci.edu/1144 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I8901d2b64659fb0d2b97d73f45a9fe113232e860 Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Taewoo Kim <wangs...@yahoo.com>