use binarySearch to replace for clause to improve performance
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/735e4777 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/735e4777 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/735e4777 Branch: refs/heads/branch-1.1 Commit: 735e4777a13cbc815625c477cfd23ca40d008790 Parents: 9d16d50 Author: mayun <simafengyun1...@163.com> Authored: Wed May 24 14:04:43 2017 +0800 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Thu Jun 15 13:06:35 2017 +0530 ---------------------------------------------------------------------- .../executer/ExcludeFilterExecuterImpl.java | 13 +++- .../executer/ExcludeFilterExecuterImplTest.java | 63 ++++++++++++++++++++ .../executer/IncludeFilterExecuterImplTest.java | 16 ++--- 3 files changed, 82 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/735e4777/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java index 8e7a3c2..7449781 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java @@ -141,14 +141,23 @@ public class ExcludeFilterExecuterImpl implements FilterExecuter { return bitSet; } + // use binary search to replace for clause private BitSet setFilterdIndexToBitSet(FixedLengthDimensionDataChunk dimColumnDataChunk, int numerOfRows) { BitSet bitSet = new BitSet(numerOfRows); bitSet.flip(0, numerOfRows); byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys(); - for (int k = 0; k < filterValues.length; k++) { + if (filterValues.length > 1) { for (int j = 0; j < numerOfRows; j++) { - if (dimColumnDataChunk.compareTo(j, filterValues[k]) == 0) { + int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1, + dimColumnDataChunk.getChunkData(j)); + if (index >= 0) { + bitSet.flip(j); + } + } + } else if (filterValues.length == 1) { + for (int j = 0; j < numerOfRows; j++) { + if (dimColumnDataChunk.compareTo(j, filterValues[0]) == 0) { bitSet.flip(j); } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/735e4777/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImplTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImplTest.java b/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImplTest.java new file mode 100644 index 0000000..e3ae42c --- /dev/null +++ b/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImplTest.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.scan.filter.executer; + +import java.util.BitSet; + +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.util.CarbonUtil; + +public class ExcludeFilterExecuterImplTest extends IncludeFilterExecuterImplTest { + + @Override public BitSet setFilterdIndexToBitSetNew(DimensionColumnDataChunk dimColumnDataChunk, + int numerOfRows, byte[][] filterValues) { + BitSet bitSet = new BitSet(numerOfRows); + bitSet.flip(0, numerOfRows); + // byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys(); + if (filterValues.length > 1) { + for (int j = 0; j < numerOfRows; j++) { + int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1, + dimColumnDataChunk.getChunkData(j)); + if (index >= 0) { + bitSet.flip(j); + } + } + } else if (filterValues.length == 1) { + for (int j = 0; j < numerOfRows; j++) { + if (dimColumnDataChunk.compareTo(j, filterValues[0]) == 0) { + bitSet.flip(j); + } + } + } + return bitSet; + } + + @Override public BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimColumnDataChunk, + int numerOfRows, byte[][] filterValues) { + BitSet bitSet = new BitSet(numerOfRows); + bitSet.flip(0, numerOfRows); + // byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys(); + for (int k = 0; k < filterValues.length; k++) { + for (int j = 0; j < numerOfRows; j++) { + if (dimColumnDataChunk.compareTo(j, filterValues[k]) == 0) { + bitSet.flip(j); + } + } + } + return bitSet; + } +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/735e4777/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java b/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java index 87b9c2d..404f77f 100644 --- a/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java +++ b/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java @@ -36,7 +36,7 @@ public class IncludeFilterExecuterImplTest extends TestCase { } - private BitSet setFilterdIndexToBitSetNew(DimensionColumnDataChunk dimensionColumnDataChunk, + public BitSet setFilterdIndexToBitSetNew(DimensionColumnDataChunk dimensionColumnDataChunk, int numerOfRows, byte[][] filterValues) { BitSet bitSet = new BitSet(numerOfRows); if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) { @@ -60,7 +60,7 @@ public class IncludeFilterExecuterImplTest extends TestCase { return bitSet; } - private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk, int numerOfRows, + public BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk, int numerOfRows, byte[][] filterValues) { BitSet bitSet = new BitSet(numerOfRows); if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) { @@ -99,8 +99,8 @@ public class IncludeFilterExecuterImplTest extends TestCase { @Test public void testPerformance() { - // dimension's data number in a blocklet, usually default is 120000 - int dataChunkSize = 120000; + // dimension's data number in a blocklet, usually default is 32000 + int dataChunkSize = 32000; // repeat query times in the test int queryTimes = 5; // repeated times for a dictionary value @@ -122,8 +122,8 @@ public class IncludeFilterExecuterImplTest extends TestCase { @Test public void testBoundary() { - // dimension's data number in a blocklet, usually default is 120000 - int dataChunkSize = 120000; + // dimension's data number in a blocklet, usually default is 32000 + int dataChunkSize = 32000; // repeat query times in the test int queryTimes = 5; // repeated times for a dictionary value @@ -268,8 +268,8 @@ public class IncludeFilterExecuterImplTest extends TestCase { long start; long end; - // dimension's data number in a blocklet, usually default is 120000 - int dataChunkSize = 120000; + // dimension's data number in a blocklet, usually default is 32000 + int dataChunkSize = 32000; // repeat query times in the test int queryTimes = 10000; // repeated times for a dictionary value