[GitHub] carbondata pull request #952: [CARBONDATA-1094] Wrong results returned by th...

kumarvishal09 Fri, 26 May 2017 02:43:07 -0700

Github user kumarvishal09 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/952#discussion_r118667394
  
    --- Diff: 
core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java
 ---
    @@ -474,80 +495,142 @@ private BitSet 
setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnD
           int numerOfRows) {
         BitSet bitSet = new BitSet(numerOfRows);
         // if (dimensionColumnDataChunk instanceof 
FixedLengthDimensionDataChunk) {
    -    int start = 0;
    -    int startMin = 0;
    -    int endMax = 0;
    -    int startIndex = 0;
         byte[][] filterValues = this.filterRangesValues;
    -    // For Range expression we expect two values. The First is the Min 
Value and Second is the
    -    // Max value.
    -    if (startBlockMinIsDefaultStart == false) {
    -
    -      start = CarbonUtil
    -          .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk,
    -              startIndex, numerOfRows - 1, filterValues[0], 
greaterThanExp);
    +    if (dimensionColumnDataChunk.isExplicitSorted()) {
    +      int start = 0;
    +      int startMin = 0;
    +      int endMax = 0;
    +      int startIndex = 0;
    +      // For Range expression we expect two values. The First is the Min 
Value and Second is the
    +      // Max value.
    +      if (startBlockMinIsDefaultStart == false) {
     
    -      if (greaterThanExp == true && start >= 0) {
             start = CarbonUtil
    -            .nextGreaterValueToTarget(start, dimensionColumnDataChunk, 
filterValues[0],
    -                numerOfRows);
    -      }
    +            .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, 
startIndex, numerOfRows - 1,
    +                filterValues[0], greaterThanExp);
     
    -      if (start < 0) {
    -        start = -(start + 1);
    -        if (start == numerOfRows) {
    -          start = start - 1;
    +        if (greaterThanExp == true && start >= 0) {
    +          start = CarbonUtil
    +              .nextGreaterValueToTarget(start, dimensionColumnDataChunk, 
filterValues[0],
    +                  numerOfRows);
             }
    -        // Method will compare the tentative index value after binary 
search, this tentative
    -        // index needs to be compared by the filter member if its >= 
filter then from that
    -        // index the bitset will be considered for filtering process.
    -        if ((ByteUtil.compare(filterValues[0], 
dimensionColumnDataChunk.getChunkData(start)))
    -            > 0) {
    -          start = start + 1;
    +
    +        if (start < 0) {
    +          start = -(start + 1);
    +          if (start == numerOfRows) {
    +            start = start - 1;
    +          }
    +          // Method will compare the tentative index value after binary 
search, this tentative
    +          // index needs to be compared by the filter member if its >= 
filter then from that
    +          // index the bitset will be considered for filtering process.
    +          if ((ByteUtil.compare(filterValues[0], 
dimensionColumnDataChunk.getChunkData(start)))
    +              > 0) {
    +            start = start + 1;
    +          }
             }
    +        startMin = start;
    +      } else {
    +        startMin = startIndex;
           }
    -      startMin = start;
    -    } else {
    -      startMin = startIndex;
    -    }
    -
    -    if (endBlockMaxisDefaultEnd == false) {
    -      start = CarbonUtil
    -          .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, 
startIndex, numerOfRows - 1,
    -              filterValues[1], lessThanEqualExp);
     
    -      if (lessThanExp == true && start >= 0) {
    -        start =
    -            CarbonUtil.nextLesserValueToTarget(start, 
dimensionColumnDataChunk, filterValues[1]);
    -      }
    +      if (endBlockMaxisDefaultEnd == false) {
    +        start = CarbonUtil
    +            .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, 
startIndex, numerOfRows - 1,
    +                filterValues[1], lessThanEqualExp);
     
    -      if (start < 0) {
    -        start = -(start + 1);
    -        if (start == numerOfRows) {
    -          start = start - 1;
    +        if (lessThanExp == true && start >= 0) {
    +          start =
    +              CarbonUtil.nextLesserValueToTarget(start, 
dimensionColumnDataChunk, filterValues[1]);
             }
    -        // In case the start is less than 0, then positive value of start 
is pointing to the next
    -        // value of the searched key. So move to the previous one.
    -        if ((ByteUtil.compare(filterValues[1], 
dimensionColumnDataChunk.getChunkData(start))
    -            < 0)) {
    -          start = start - 1;
    +
    +        if (start < 0) {
    +          start = -(start + 1);
    +          if (start == numerOfRows) {
    +            start = start - 1;
    +          }
    +          // In case the start is less than 0, then positive value of 
start is pointing to the next
    +          // value of the searched key. So move to the previous one.
    +          if ((ByteUtil.compare(filterValues[1], 
dimensionColumnDataChunk.getChunkData(start))
    +              < 0)) {
    +            start = start - 1;
    +          }
             }
    +        endMax = start;
    +      } else {
    +        endMax = numerOfRows - 1;
    +      }
    +
    +      for (int j = startMin; j <= endMax; j++) {
    +        bitSet.set(j);
    +      }
    +
    +      // Binary Search cannot be done on '@NU#LL$!", so need to check and 
compare for null on
    +      // matching row.
    +      if (dimensionColumnDataChunk.isNoDicitionaryColumn()) {
    +        updateForNoDictionaryColumn(startMin, endMax, 
dimensionColumnDataChunk, bitSet);
           }
    -      endMax = start;
         } else {
    -      endMax = numerOfRows - 1;
    +      // evaluate result for lower range value first and then perform and 
operation in the
    +      // upper range value in order to compute the final result
    +      bitSet = 
evaluateGreaterThanFilterForUnsortedColumn(dimensionColumnDataChunk, 
filterValues[0],
    +          numerOfRows);
    +      
bitSet.and(evaluateLessThanFilterForUnsortedColumn(dimensionColumnDataChunk, 
filterValues[1],
    +          numerOfRows));
         }
    +    return bitSet;
    +  }
     
    -    for (int j = startMin; j <= endMax; j++) {
    -      bitSet.set(j);
    +  /**
    +   * This method will evaluate the result for filter column based on the 
lower range value
    +   *
    +   * @param dimensionColumnDataChunk
    +   * @param filterValue
    +   * @param numberOfRows
    +   * @return
    +   */
    +  private BitSet evaluateGreaterThanFilterForUnsortedColumn(
    +      DimensionColumnDataChunk dimensionColumnDataChunk, byte[] 
filterValue, int numberOfRows) {
    +    BitSet bitSet = new BitSet(numberOfRows);
    +    if (greaterThanExp) {
    +      for (int i = 0; i < numberOfRows; i++) {
    +        if ((ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), 
filterValue) > 0)) {
    +          bitSet.set(i);
    +        }
    +      }
    +    } else if (greaterThanEqualExp) {
    +      for (int i = 0; i < numberOfRows; i++) {
    +        if ((ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), 
filterValue) >= 0)) {
    +          bitSet.set(i);
    +        }
    +      }
         }
    +    return bitSet;
    +  }
     
    -    // Binary Search cannot be done on '@NU#LL$!", so need to check and 
compare for null on
    -    // matching row.
    -    if (dimensionColumnDataChunk.isNoDicitionaryColumn()) {
    -      updateForNoDictionaryColumn(startMin, endMax, 
dimensionColumnDataChunk, bitSet);
    +  /**
    +   * This method will evaluate the result for filter column based on the 
upper range value
    +   *
    +   * @param dimensionColumnDataChunk
    +   * @param filterValue
    +   * @param numberOfRows
    +   * @return
    +   */
    +  private BitSet evaluateLessThanFilterForUnsortedColumn(
    +      DimensionColumnDataChunk dimensionColumnDataChunk, byte[] 
filterValue, int numberOfRows) {
    --- End diff --
    
    As we are adding @nu#LL$! for No dictionary null values in this method 
after getting less values or less than equal to values we need to check whether 
null values are present or not if it is present then we need to remove it from 
bitset



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] carbondata pull request #952: [CARBONDATA-1094] Wrong results returned by th...

Reply via email to