use binarySearch to replace for clause to improve performance

Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/735e4777
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/735e4777
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/735e4777

Branch: refs/heads/branch-1.1
Commit: 735e4777a13cbc815625c477cfd23ca40d008790
Parents: 9d16d50
Author: mayun <simafengyun1...@163.com>
Authored: Wed May 24 14:04:43 2017 +0800
Committer: ravipesala <ravi.pes...@gmail.com>
Committed: Thu Jun 15 13:06:35 2017 +0530

----------------------------------------------------------------------
 .../executer/ExcludeFilterExecuterImpl.java     | 13 +++-
 .../executer/ExcludeFilterExecuterImplTest.java | 63 ++++++++++++++++++++
 .../executer/IncludeFilterExecuterImplTest.java | 16 ++---
 3 files changed, 82 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/735e4777/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java
index 8e7a3c2..7449781 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java
@@ -141,14 +141,23 @@ public class ExcludeFilterExecuterImpl implements 
FilterExecuter {
     return bitSet;
   }
 
+  // use binary search to replace for clause
   private BitSet setFilterdIndexToBitSet(FixedLengthDimensionDataChunk 
dimColumnDataChunk,
       int numerOfRows) {
     BitSet bitSet = new BitSet(numerOfRows);
     bitSet.flip(0, numerOfRows);
     byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
-    for (int k = 0; k < filterValues.length; k++) {
+    if (filterValues.length > 1) {
       for (int j = 0; j < numerOfRows; j++) {
-        if (dimColumnDataChunk.compareTo(j, filterValues[k]) == 0) {
+        int index = CarbonUtil.binarySearch(filterValues, 0, 
filterValues.length - 1,
+            dimColumnDataChunk.getChunkData(j));
+        if (index >= 0) {
+          bitSet.flip(j);
+        }
+      }
+    } else if (filterValues.length == 1) {
+      for (int j = 0; j < numerOfRows; j++) {
+        if (dimColumnDataChunk.compareTo(j, filterValues[0]) == 0) {
           bitSet.flip(j);
         }
       }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/735e4777/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImplTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImplTest.java
 
b/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImplTest.java
new file mode 100644
index 0000000..e3ae42c
--- /dev/null
+++ 
b/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImplTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.scan.filter.executer;
+
+import java.util.BitSet;
+
+import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
+import org.apache.carbondata.core.util.CarbonUtil;
+
+public class ExcludeFilterExecuterImplTest extends 
IncludeFilterExecuterImplTest {
+
+ @Override public BitSet setFilterdIndexToBitSetNew(DimensionColumnDataChunk 
dimColumnDataChunk,
+     int numerOfRows, byte[][] filterValues) {
+   BitSet bitSet = new BitSet(numerOfRows);
+   bitSet.flip(0, numerOfRows);
+   // byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
+   if (filterValues.length > 1) {
+     for (int j = 0; j < numerOfRows; j++) {
+       int index = CarbonUtil.binarySearch(filterValues, 0, 
filterValues.length - 1,
+           dimColumnDataChunk.getChunkData(j));
+       if (index >= 0) {
+         bitSet.flip(j);
+       }
+     }
+   } else if (filterValues.length == 1) {
+     for (int j = 0; j < numerOfRows; j++) {
+       if (dimColumnDataChunk.compareTo(j, filterValues[0]) == 0) {
+         bitSet.flip(j);
+       }
+     }
+   }
+   return bitSet;
+ }
+
+ @Override public BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk 
dimColumnDataChunk,
+      int numerOfRows, byte[][] filterValues) {
+    BitSet bitSet = new BitSet(numerOfRows);
+    bitSet.flip(0, numerOfRows);
+    // byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
+    for (int k = 0; k < filterValues.length; k++) {
+      for (int j = 0; j < numerOfRows; j++) {
+        if (dimColumnDataChunk.compareTo(j, filterValues[k]) == 0) {
+          bitSet.flip(j);
+        }
+      }
+    }
+    return bitSet;
+  }
+}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/735e4777/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java
 
b/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java
index 87b9c2d..404f77f 100644
--- 
a/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java
+++ 
b/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java
@@ -36,7 +36,7 @@ public class IncludeFilterExecuterImplTest extends TestCase {
 
   }
 
-  private BitSet setFilterdIndexToBitSetNew(DimensionColumnDataChunk 
dimensionColumnDataChunk,
+  public BitSet setFilterdIndexToBitSetNew(DimensionColumnDataChunk 
dimensionColumnDataChunk,
       int numerOfRows, byte[][] filterValues) {
     BitSet bitSet = new BitSet(numerOfRows);
     if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
@@ -60,7 +60,7 @@ public class IncludeFilterExecuterImplTest extends TestCase {
     return bitSet;
   }
 
-  private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk 
dimensionColumnDataChunk, int numerOfRows,
+  public BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk 
dimensionColumnDataChunk, int numerOfRows,
       byte[][] filterValues) {
     BitSet bitSet = new BitSet(numerOfRows);
     if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
@@ -99,8 +99,8 @@ public class IncludeFilterExecuterImplTest extends TestCase {
   @Test
   public void testPerformance() {
 
-    // dimension's data number in a blocklet, usually default is 120000
-    int dataChunkSize = 120000; 
+    // dimension's data number in a blocklet, usually default is 32000
+    int dataChunkSize = 32000; 
     //  repeat query times in the test
     int queryTimes = 5;    
     // repeated times for a dictionary value
@@ -122,8 +122,8 @@ public class IncludeFilterExecuterImplTest extends TestCase 
{
   @Test
   public void testBoundary() {
 
-       // dimension's data number in a blocklet, usually default is 120000
-    int dataChunkSize = 120000; 
+       // dimension's data number in a blocklet, usually default is 32000
+    int dataChunkSize = 32000; 
     //  repeat query times in the test
     int queryTimes = 5;    
     // repeated times for a dictionary value
@@ -268,8 +268,8 @@ public class IncludeFilterExecuterImplTest extends TestCase 
{
     long start;
     long end;
     
-    // dimension's data number in a blocklet, usually default is 120000
-    int dataChunkSize = 120000; 
+    // dimension's data number in a blocklet, usually default is 32000
+    int dataChunkSize = 32000; 
     //  repeat query times in the test
     int queryTimes = 10000;    
     // repeated times for a dictionary value

Reply via email to