Github user kumarvishal09 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/952#discussion_r118667394 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -474,80 +495,142 @@ private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnD int numerOfRows) { BitSet bitSet = new BitSet(numerOfRows); // if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) { - int start = 0; - int startMin = 0; - int endMax = 0; - int startIndex = 0; byte[][] filterValues = this.filterRangesValues; - // For Range expression we expect two values. The First is the Min Value and Second is the - // Max value. - if (startBlockMinIsDefaultStart == false) { - - start = CarbonUtil - .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, - startIndex, numerOfRows - 1, filterValues[0], greaterThanExp); + if (dimensionColumnDataChunk.isExplicitSorted()) { + int start = 0; + int startMin = 0; + int endMax = 0; + int startIndex = 0; + // For Range expression we expect two values. The First is the Min Value and Second is the + // Max value. + if (startBlockMinIsDefaultStart == false) { - if (greaterThanExp == true && start >= 0) { start = CarbonUtil - .nextGreaterValueToTarget(start, dimensionColumnDataChunk, filterValues[0], - numerOfRows); - } + .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1, + filterValues[0], greaterThanExp); - if (start < 0) { - start = -(start + 1); - if (start == numerOfRows) { - start = start - 1; + if (greaterThanExp == true && start >= 0) { + start = CarbonUtil + .nextGreaterValueToTarget(start, dimensionColumnDataChunk, filterValues[0], + numerOfRows); } - // Method will compare the tentative index value after binary search, this tentative - // index needs to be compared by the filter member if its >= filter then from that - // index the bitset will be considered for filtering process. - if ((ByteUtil.compare(filterValues[0], dimensionColumnDataChunk.getChunkData(start))) - > 0) { - start = start + 1; + + if (start < 0) { + start = -(start + 1); + if (start == numerOfRows) { + start = start - 1; + } + // Method will compare the tentative index value after binary search, this tentative + // index needs to be compared by the filter member if its >= filter then from that + // index the bitset will be considered for filtering process. + if ((ByteUtil.compare(filterValues[0], dimensionColumnDataChunk.getChunkData(start))) + > 0) { + start = start + 1; + } } + startMin = start; + } else { + startMin = startIndex; } - startMin = start; - } else { - startMin = startIndex; - } - - if (endBlockMaxisDefaultEnd == false) { - start = CarbonUtil - .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1, - filterValues[1], lessThanEqualExp); - if (lessThanExp == true && start >= 0) { - start = - CarbonUtil.nextLesserValueToTarget(start, dimensionColumnDataChunk, filterValues[1]); - } + if (endBlockMaxisDefaultEnd == false) { + start = CarbonUtil + .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1, + filterValues[1], lessThanEqualExp); - if (start < 0) { - start = -(start + 1); - if (start == numerOfRows) { - start = start - 1; + if (lessThanExp == true && start >= 0) { + start = + CarbonUtil.nextLesserValueToTarget(start, dimensionColumnDataChunk, filterValues[1]); } - // In case the start is less than 0, then positive value of start is pointing to the next - // value of the searched key. So move to the previous one. - if ((ByteUtil.compare(filterValues[1], dimensionColumnDataChunk.getChunkData(start)) - < 0)) { - start = start - 1; + + if (start < 0) { + start = -(start + 1); + if (start == numerOfRows) { + start = start - 1; + } + // In case the start is less than 0, then positive value of start is pointing to the next + // value of the searched key. So move to the previous one. + if ((ByteUtil.compare(filterValues[1], dimensionColumnDataChunk.getChunkData(start)) + < 0)) { + start = start - 1; + } } + endMax = start; + } else { + endMax = numerOfRows - 1; + } + + for (int j = startMin; j <= endMax; j++) { + bitSet.set(j); + } + + // Binary Search cannot be done on '@NU#LL$!", so need to check and compare for null on + // matching row. + if (dimensionColumnDataChunk.isNoDicitionaryColumn()) { + updateForNoDictionaryColumn(startMin, endMax, dimensionColumnDataChunk, bitSet); } - endMax = start; } else { - endMax = numerOfRows - 1; + // evaluate result for lower range value first and then perform and operation in the + // upper range value in order to compute the final result + bitSet = evaluateGreaterThanFilterForUnsortedColumn(dimensionColumnDataChunk, filterValues[0], + numerOfRows); + bitSet.and(evaluateLessThanFilterForUnsortedColumn(dimensionColumnDataChunk, filterValues[1], + numerOfRows)); } + return bitSet; + } - for (int j = startMin; j <= endMax; j++) { - bitSet.set(j); + /** + * This method will evaluate the result for filter column based on the lower range value + * + * @param dimensionColumnDataChunk + * @param filterValue + * @param numberOfRows + * @return + */ + private BitSet evaluateGreaterThanFilterForUnsortedColumn( + DimensionColumnDataChunk dimensionColumnDataChunk, byte[] filterValue, int numberOfRows) { + BitSet bitSet = new BitSet(numberOfRows); + if (greaterThanExp) { + for (int i = 0; i < numberOfRows; i++) { + if ((ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), filterValue) > 0)) { + bitSet.set(i); + } + } + } else if (greaterThanEqualExp) { + for (int i = 0; i < numberOfRows; i++) { + if ((ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), filterValue) >= 0)) { + bitSet.set(i); + } + } } + return bitSet; + } - // Binary Search cannot be done on '@NU#LL$!", so need to check and compare for null on - // matching row. - if (dimensionColumnDataChunk.isNoDicitionaryColumn()) { - updateForNoDictionaryColumn(startMin, endMax, dimensionColumnDataChunk, bitSet); + /** + * This method will evaluate the result for filter column based on the upper range value + * + * @param dimensionColumnDataChunk + * @param filterValue + * @param numberOfRows + * @return + */ + private BitSet evaluateLessThanFilterForUnsortedColumn( + DimensionColumnDataChunk dimensionColumnDataChunk, byte[] filterValue, int numberOfRows) { --- End diff -- As we are adding @nu#LL$! for No dictionary null values in this method after getting less values or less than equal to values we need to check whether null values are present or not if it is present then we need to remove it from bitset
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---