iverase commented on a change in pull request #569: LUCENE-8687: Optimise radix partitioning for points on heap URL: https://github.com/apache/lucene-solr/pull/569#discussion_r255046022
########## File path: lucene/core/src/java/org/apache/lucene/util/bkd/BKDRadixSelector.java ########## @@ -196,33 +208,45 @@ private int findCommonPrefix(OfflinePointWriter points, long from, long to, int //special case when be have lot of points that are equal if (commonPrefix == bytesSorted - 1) { long tieBreakCount =(partitionPoint - from - leftCount); - partition(points, left, right, null, from, to, dim, commonPrefix, tieBreakCount); + offlinePartition(points, left, right, null, from, to, dim, commonPrefix, tieBreakCount); return partitionPointFromCommonPrefix(); } //create the delta points writer PointWriter deltaPoints; - if (delta <= maxPointsSortInHeap) { + if (delta <= getMaxPointsSortInHeap(left, right)) { deltaPoints = new HeapPointWriter(Math.toIntExact(delta), Math.toIntExact(delta), packedBytesLength); } else { deltaPoints = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, "delta" + iteration, delta); } //divide the points. This actually destroys the current writer - partition(points, left, right, deltaPoints, from, to, dim, commonPrefix, 0); + offlinePartition(points, left, right, deltaPoints, from, to, dim, commonPrefix, 0); //close delta point writer deltaPoints.close(); long newPartitionPoint = partitionPoint - from - leftCount; if (deltaPoints instanceof HeapPointWriter) { - return heapSelect((HeapPointWriter) deltaPoints, left, right, dim, 0, (int) deltaPoints.count(), Math.toIntExact(newPartitionPoint), ++commonPrefix); + return heapPartition((HeapPointWriter) deltaPoints, left, right, dim, 0, (int) deltaPoints.count(), Math.toIntExact(newPartitionPoint), ++commonPrefix); } else { return buildHistogramAndPartition((OfflinePointWriter) deltaPoints, left, right, 0, deltaPoints.count(), newPartitionPoint, ++iteration, ++commonPrefix, dim); } } - private void partition(OfflinePointWriter points, PointWriter left, PointWriter right, PointWriter deltaPoints, - long from, long to, int dim, int bytePosition, long numDocsTiebreak) throws IOException { + private int getMaxPointsSortInHeap(PointWriter left, PointWriter right) { + long pointsUsed = 0; + if (left instanceof HeapPointWriter) { + pointsUsed += left.count(); + } + if (right instanceof HeapPointWriter) { + pointsUsed += right.count(); + } + assert maxPointsSortInHeap >= pointsUsed; + return maxPointsSortInHeap - (int) pointsUsed; Review comment: I have changed the logic a bit and I am using the `maxSize` on the `HeapPointwriter` to calculate the offset to move into heap the selection. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org