HIVE-17147: Vectorization: Add code for testing MapJoin operator in isolation and measuring its performance with JMH (Matt McCline via Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3c7fb2a9 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3c7fb2a9 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3c7fb2a9 Branch: refs/heads/master Commit: 3c7fb2a976ad363a2f96588d0c4880f682f0b855 Parents: 88da238 Author: Prasanth Jayachandran <prasan...@apache.org> Authored: Tue Jul 25 02:49:01 2017 -0700 Committer: Prasanth Jayachandran <prasan...@apache.org> Committed: Tue Jul 25 02:49:01 2017 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 + itests/hive-jmh/pom.xml | 6 + .../vectorization/mapjoin/AbstractMapJoin.java | 179 +++++ .../mapjoin/MapJoinMultiKeyBench.java | 313 +++++++++ .../mapjoin/MapJoinMultiKeyBenchBase.java | 68 ++ .../mapjoin/MapJoinOneLongKeyBench.java | 313 +++++++++ .../mapjoin/MapJoinOneLongKeyBenchBase.java | 66 ++ .../mapjoin/MapJoinOneStringKeyBench.java | 313 +++++++++ .../mapjoin/MapJoinOneStringKeyBenchBase.java | 66 ++ .../hadoop/hive/ql/exec/MapJoinOperator.java | 18 + .../hive/ql/exec/spark/HashTableLoader.java | 2 +- .../hive/ql/exec/vector/VectorBatchDebug.java | 105 +++ .../mapjoin/VectorMapJoinCommonOperator.java | 35 +- .../fast/VectorMapJoinFastTableContainer.java | 8 +- .../VectorMapJoinOptimizedCreateHashTable.java | 6 +- .../hive/ql/optimizer/physical/Vectorizer.java | 26 +- .../apache/hadoop/hive/ql/plan/MapJoinDesc.java | 6 +- .../hadoop/hive/ql/plan/VectorMapJoinDesc.java | 24 +- .../hive/ql/exec/util/DescriptionTest.java | 31 + .../CollectorTestOperator.java | 47 ++ .../CountCollectorTestOperator.java | 48 ++ .../CountVectorCollectorTestOperator.java | 47 ++ .../RowCollectorTestOperator.java | 52 ++ .../RowCollectorTestOperatorBase.java | 32 + .../RowVectorCollectorTestOperator.java | 63 ++ .../ql/exec/util/rowobjects/RowTestObjects.java | 100 +++ .../exec/util/rowobjects/RowTestObjectsMap.java | 77 +++ .../util/rowobjects/RowTestObjectsMultiSet.java | 91 +++ .../exec/vector/mapjoin/MapJoinTestConfig.java | 658 +++++++++++++++++++ .../ql/exec/vector/mapjoin/MapJoinTestData.java | 272 ++++++++ .../vector/mapjoin/MapJoinTestDescription.java | 239 +++++++ .../vector/mapjoin/TestMapJoinOperator.java | 549 ++++++++++++++++ .../util/batchgen/TestVectorBatchGenerate.java | 57 ++ .../batchgen/VectorBatchGenerateStream.java | 63 ++ .../util/batchgen/VectorBatchGenerateUtil.java | 92 +++ .../util/batchgen/VectorBatchGenerator.java | 306 +++++++++ .../batchgen/VectorColumnGroupGenerator.java | 494 ++++++++++++++ 37 files changed, 4834 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 3ebe503..3cf76d0 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -560,6 +560,8 @@ public class HiveConf extends Configuration { HIVE_IN_TEZ_TEST("hive.in.tez.test", false, "internal use only, true when in testing tez", true), + HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD("hive.mapjoin.testing.no.hash.table.load", false, "internal use only, true when in testing map join", + true), LOCALMODEAUTO("hive.exec.mode.local.auto", false, "Let Hive determine whether to run in local mode automatically"), http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/itests/hive-jmh/pom.xml ---------------------------------------------------------------------- diff --git a/itests/hive-jmh/pom.xml b/itests/hive-jmh/pom.xml index af8eb19..0ff584c 100644 --- a/itests/hive-jmh/pom.xml +++ b/itests/hive-jmh/pom.xml @@ -56,7 +56,13 @@ <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-exec</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-exec</artifactId> <version>${project.version}</version> + <classifier>tests</classifier> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java ---------------------------------------------------------------------- diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java new file mode 100644 index 0000000..324f562 --- /dev/null +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java @@ -0,0 +1,179 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.benchmark.vectorization.mapjoin; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateUtil; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.util.Random; +import java.util.concurrent.TimeUnit; + +// UNDONE: For now, just run once cold. +@BenchmarkMode(Mode.SingleShotTime) +@Fork(1) +@State(Scope.Thread) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +public abstract class AbstractMapJoin { + protected VectorMapJoinVariation vectorMapJoinVariation; + protected MapJoinTestImplementation mapJoinImplementation; + protected MapJoinTestDescription testDesc; + protected MapJoinTestData testData; + + protected MapJoinOperator operator; + + protected boolean isVectorOutput; + + protected Object[][] bigTableRows; + + protected VectorizedRowBatch[] bigTableBatches; + + @Benchmark + // @Warmup(iterations = 0, time = 1, timeUnit = TimeUnit.MILLISECONDS) + @Measurement(iterations = 1, time = 1, timeUnit = TimeUnit.MILLISECONDS) + public void bench() throws Exception { + if (!isVectorOutput) { + executeBenchmarkImplementationRow(mapJoinImplementation, testDesc, testData, operator, bigTableRows); + } else { + executeBenchmarkImplementationVector(mapJoinImplementation, testDesc, testData, operator, bigTableBatches); + } + } + + protected void setupMapJoin(HiveConf hiveConf, long seed, int rowCount, + VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation, + String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, int[] bigTableKeyColumnNums, + String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, + int[] bigTableRetainColumnNums, + int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, + SmallTableGenerationParameters smallTableGenerationParameters) throws Exception { + + this.vectorMapJoinVariation = vectorMapJoinVariation; + this.mapJoinImplementation = mapJoinImplementation; + testDesc = new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableColumnNames, bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueColumnNames, smallTableValueTypeInfos, + bigTableRetainColumnNums, + smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, + smallTableGenerationParameters); + + // Prepare data. Good for ANY implementation variation. + testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + operator = setupBenchmarkImplementation( + mapJoinImplementation, testDesc, testData); + + isVectorOutput = isVectorOutput(mapJoinImplementation); + + /* + * We don't measure data generation execution cost -- generate the big table into memory first. + */ + if (!isVectorOutput) { + + bigTableRows = VectorBatchGenerateUtil.generateRowObjectArray( + testDesc.bigTableKeyTypeInfos, testData.getBigTableBatchStream(), + testData.getBigTableBatch(), testDesc.outputObjectInspectors); + + } else { + + bigTableBatches = VectorBatchGenerateUtil.generateBatchArray( + testData.getBigTableBatchStream(), testData.getBigTableBatch()); + + } + } + + private static boolean isVectorOutput(MapJoinTestImplementation mapJoinImplementation) { + return + (mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_HASH_MAP && + mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + + protected static MapJoinOperator setupBenchmarkImplementation( + MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, + MapJoinTestData testData) + throws Exception { + + // UNDONE: Parameterize for implementation variation? + MapJoinDesc mapJoinDesc = MapJoinTestConfig.createMapJoinDesc(testDesc); + + final boolean isVectorOutput = isVectorOutput(mapJoinImplementation); + + // This collector is just a row counter. + Operator<? extends OperatorDesc> testCollectorOperator = + (!isVectorOutput ? new CountCollectorTestOperator() : + new CountVectorCollectorTestOperator()); + + MapJoinOperator operator = + MapJoinTestConfig.createMapJoinImplementation( + mapJoinImplementation, testDesc, testCollectorOperator, testData, mapJoinDesc); + return operator; + } + + private static void executeBenchmarkImplementationRow( + MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, + MapJoinTestData testData, MapJoinOperator operator, Object[][] bigTableRows) + throws Exception { + + final int size = bigTableRows.length; + for (int i = 0; i < size; i++) { + operator.process(bigTableRows[i], 0); + } + operator.closeOp(false); + } + + private static void executeBenchmarkImplementationVector( + MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, + MapJoinTestData testData, MapJoinOperator operator, VectorizedRowBatch[] bigTableBatches) + throws Exception { + + final int size = bigTableBatches.length; + for (int i = 0; i < size; i++) { + operator.process(bigTableBatches[i], 0); + } + operator.closeOp(false); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java ---------------------------------------------------------------------- diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java new file mode 100644 index 0000000..f183bb5 --- /dev/null +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java @@ -0,0 +1,313 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.benchmark.vectorization.mapjoin; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorBatchDebug; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.JoinCondDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.serde2.AbstractSerDe; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hive.benchmark.vectorization.VectorizedArithmeticBench; +import org.apache.hive.common.util.HashCodeUtil; +import org.apache.hive.common.util.ReflectionUtil; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Random; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.profile.StackProfiler; + +/* + * Simple one long key map join benchmarks. + * + * Build with "mvn clean install -DskipTests -Pdist,itests" at main hive directory. + * + * From itests/hive-jmh directory, run: + * java -jar target/benchmarks.jar org.apache.hive.benchmark.vectorization.mapjoin.MapJoinMultiKeyBench + * + * {INNER, INNER_BIG_ONLY, LEFT_SEMI, OUTER} + * X + * {ROW_MODE_HASH_MAP, ROW_MODE_OPTIMIZED, VECTOR_PASS_THROUGH, NATIVE_VECTOR_OPTIMIZED, NATIVE_VECTOR_FAST} + * + */ +@State(Scope.Benchmark) +public class MapJoinMultiKeyBench extends AbstractMapJoin { + + public static class MapJoinMultiKeyInnerRowModeHashMapBench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.ROW_MODE_HASH_MAP); + } + } + + public static class MapJoinMultiKeyInnerRowModeOptimized_Bench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + } + + public static class MapJoinMultiKeyInnerVectorPassThrough_Bench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.VECTOR_PASS_THROUGH); + } + } + + public static class MapJoinMultiKeyInnerNativeVectorOptimizedBench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); + } + } + + public static class MapJoinMultiKeyInnerNativeVectorFastBench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.NATIVE_VECTOR_FAST); + } + } + + //----------------------------------------------------------------------------------------------- + + public static class MapJoinMultiKeyInnerBigOnlyRowModeHashMapBench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.ROW_MODE_HASH_MAP); + } + } + + public static class MapJoinMultiKeyInnerBigOnlyRowModeOptimized_Bench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + } + + public static class MapJoinMultiKeyInnerBigOnlyVectorPassThrough_Bench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.VECTOR_PASS_THROUGH); + } + } + + public static class MapJoinMultiKeyInnerBigOnlyNativeVectorOptimizedBench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); + } + } + + public static class MapJoinMultiKeyInnerBigOnlyNativeVectorFastBench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.NATIVE_VECTOR_FAST); + } + } + + //----------------------------------------------------------------------------------------------- + + public static class MapJoinMultiKeyLeftSemiRowModeHashMapBench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.ROW_MODE_HASH_MAP); + } + } + + public static class MapJoinMultiKeyLeftSemiRowModeOptimized_Bench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + } + + public static class MapJoinMultiKeyLeftSemiVectorPassThrough_Bench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.VECTOR_PASS_THROUGH); + } + } + + public static class MapJoinMultiKeyLeftSemiNativeVectorOptimizedBench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); + } + } + + public static class MapJoinMultiKeyLeftSemiNativeVectorFastBench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.NATIVE_VECTOR_FAST); + } + } + + //----------------------------------------------------------------------------------------------- + + public static class MapJoinMultiKeyOuterRowModeHashMapBench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.ROW_MODE_HASH_MAP); + } + } + + public static class MapJoinMultiKeyOuterRowModeOptimized_Bench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + } + + public static class MapJoinMultiKeyOuterVectorPassThrough_Bench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.VECTOR_PASS_THROUGH); + } + } + + public static class MapJoinMultiKeyOuterNativeVectorOptimizedBench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); + } + } + + public static class MapJoinMultiKeyOuterNativeVectorFastBench extends MapJoinMultiKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.NATIVE_VECTOR_FAST); + } + } + + //----------------------------------------------------------------------------------------------- + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(".*" + MapJoinMultiKeyBench.class.getSimpleName() + ".*") + .build(); + new Runner(opt).run(); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java ---------------------------------------------------------------------- diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java new file mode 100644 index 0000000..052b337 --- /dev/null +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.benchmark.vectorization.mapjoin; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +public abstract class MapJoinMultiKeyBenchBase extends AbstractMapJoin { + + public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, + MapJoinTestImplementation mapJoinImplementation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + long seed = 2543; + + int rowCount = 100000; // 100,000. + + String[] bigTableColumnNames = new String[] {"b1", "b2", "b3"}; + TypeInfo[] bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.longTypeInfo, + TypeInfoFactory.stringTypeInfo}; + int[] bigTableKeyColumnNums = new int[] {0, 1, 2}; + + String[] smallTableValueColumnNames = new String[] {"sv1"}; + TypeInfo[] smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.stringTypeInfo}; + + int[] bigTableRetainColumnNums = new int[] {0, 1, 2}; + + int[] smallTableRetainKeyColumnNums = new int[] {}; + int[] smallTableRetainValueColumnNums = new int[] {0}; + + SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + smallTableGenerationParameters.setValueOption(ValueOption.ONLY_ONE); + + setupMapJoin(hiveConf, seed, rowCount, + vectorMapJoinVariation, mapJoinImplementation, + bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + smallTableValueColumnNames, smallTableValueTypeInfos, + bigTableRetainColumnNums, + smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, + smallTableGenerationParameters); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java ---------------------------------------------------------------------- diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java new file mode 100644 index 0000000..8d39953 --- /dev/null +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java @@ -0,0 +1,313 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.benchmark.vectorization.mapjoin; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorBatchDebug; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.JoinCondDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.serde2.AbstractSerDe; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hive.benchmark.vectorization.VectorizedArithmeticBench; +import org.apache.hive.common.util.HashCodeUtil; +import org.apache.hive.common.util.ReflectionUtil; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Random; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.profile.StackProfiler; + +/* + * Simple one long key map join benchmarks. + * + * Build with "mvn clean install -DskipTests -Pdist,itests" at main hive directory. + * + * From itests/hive-jmh directory, run: + * java -jar target/benchmarks.jar org.apache.hive.benchmark.vectorization.mapjoin.MapJoinOneLongKeyBench + * + * {INNER, INNER_BIG_ONLY, LEFT_SEMI, OUTER} + * X + * {ROW_MODE_HASH_MAP, ROW_MODE_OPTIMIZED, VECTOR_PASS_THROUGH, NATIVE_VECTOR_OPTIMIZED, NATIVE_VECTOR_FAST} + * + */ +@State(Scope.Benchmark) +public class MapJoinOneLongKeyBench extends AbstractMapJoin { + + public static class MapJoinOneLongKeyInnerRowModeHashMapBench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.ROW_MODE_HASH_MAP); + } + } + + public static class MapJoinOneLongKeyInnerRowModeOptimized_Bench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + } + + public static class MapJoinOneLongKeyInnerVectorPassThrough_Bench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.VECTOR_PASS_THROUGH); + } + } + + public static class MapJoinOneLongKeyInnerNativeVectorOptimizedBench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); + } + } + + public static class MapJoinOneLongKeyInnerNativeVectorFastBench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.NATIVE_VECTOR_FAST); + } + } + + //----------------------------------------------------------------------------------------------- + + public static class MapJoinOneLongKeyInnerBigOnlyRowModeHashMapBench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.ROW_MODE_HASH_MAP); + } + } + + public static class MapJoinOneLongKeyInnerBigOnlyRowModeOptimized_Bench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + } + + public static class MapJoinOneLongKeyInnerBigOnlyVectorPassThrough_Bench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.VECTOR_PASS_THROUGH); + } + } + + public static class MapJoinOneLongKeyInnerBigOnlyNativeVectorOptimizedBench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); + } + } + + public static class MapJoinOneLongKeyInnerBigOnlyNativeVectorFastBench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.NATIVE_VECTOR_FAST); + } + } + + //----------------------------------------------------------------------------------------------- + + public static class MapJoinOneLongKeyLeftSemiRowModeHashMapBench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.ROW_MODE_HASH_MAP); + } + } + + public static class MapJoinOneLongKeyLeftSemiRowModeOptimized_Bench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + } + + public static class MapJoinOneLongKeyLeftSemiVectorPassThrough_Bench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.VECTOR_PASS_THROUGH); + } + } + + public static class MapJoinOneLongKeyLeftSemiNativeVectorOptimizedBench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); + } + } + + public static class MapJoinOneLongKeyLeftSemiNativeVectorFastBench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.NATIVE_VECTOR_FAST); + } + } + + //----------------------------------------------------------------------------------------------- + + public static class MapJoinOneLongKeyOuterRowModeHashMapBench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.ROW_MODE_HASH_MAP); + } + } + + public static class MapJoinOneLongKeyOuterRowModeOptimized_Bench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + } + + public static class MapJoinOneLongKeyOuterVectorPassThrough_Bench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.VECTOR_PASS_THROUGH); + } + } + + public static class MapJoinOneLongKeyOuterNativeVectorOptimizedBench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); + } + } + + public static class MapJoinOneLongKeyOuterNativeVectorFastBench extends MapJoinOneLongKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.NATIVE_VECTOR_FAST); + } + } + + //----------------------------------------------------------------------------------------------- + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(".*" + MapJoinOneLongKeyBench.class.getSimpleName() + ".*") + .build(); + new Runner(opt).run(); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java ---------------------------------------------------------------------- diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java new file mode 100644 index 0000000..111e551 --- /dev/null +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.benchmark.vectorization.mapjoin; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +public abstract class MapJoinOneLongKeyBenchBase extends AbstractMapJoin { + + public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, + MapJoinTestImplementation mapJoinImplementation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + long seed = 2543; + + int rowCount = 10000000; // 10,000,000. + + String[] bigTableColumnNames = new String[] {"number1"}; + TypeInfo[] bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + int[] bigTableKeyColumnNums = new int[] {0}; + + String[] smallTableValueColumnNames = new String[] {"sv1", "sv2"}; + TypeInfo[] smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo}; + + int[] bigTableRetainColumnNums = new int[] {0}; + + int[] smallTableRetainKeyColumnNums = new int[] {}; + int[] smallTableRetainValueColumnNums = new int[] {0, 1}; + + SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + smallTableGenerationParameters.setValueOption(ValueOption.ONLY_ONE); + + setupMapJoin(hiveConf, seed, rowCount, + vectorMapJoinVariation, mapJoinImplementation, + bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + smallTableValueColumnNames, smallTableValueTypeInfos, + bigTableRetainColumnNums, + smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, + smallTableGenerationParameters); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java ---------------------------------------------------------------------- diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java new file mode 100644 index 0000000..9857ae2 --- /dev/null +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java @@ -0,0 +1,313 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.benchmark.vectorization.mapjoin; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorBatchDebug; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; +import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.JoinCondDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.serde2.AbstractSerDe; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hive.benchmark.vectorization.VectorizedArithmeticBench; +import org.apache.hive.common.util.HashCodeUtil; +import org.apache.hive.common.util.ReflectionUtil; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Random; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.profile.StackProfiler; + +/* + * Simple one long key map join benchmarks. + * + * Build with "mvn clean install -DskipTests -Pdist,itests" at main hive directory. + * + * From itests/hive-jmh directory, run: + * java -jar target/benchmarks.jar org.apache.hive.benchmark.vectorization.mapjoin.MapJoinOneStringKeyBench + * + * {INNER, INNER_BIG_ONLY, LEFT_SEMI, OUTER} + * X + * {ROW_MODE_HASH_MAP, ROW_MODE_OPTIMIZED, VECTOR_PASS_THROUGH, NATIVE_VECTOR_OPTIMIZED, NATIVE_VECTOR_FAST} + * + */ +@State(Scope.Benchmark) +public class MapJoinOneStringKeyBench extends AbstractMapJoin { + + public static class MapJoinOneStringKeyInnerRowModeHashMapBench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.ROW_MODE_HASH_MAP); + } + } + + public static class MapJoinOneStringKeyInnerRowModeOptimized_Bench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + } + + public static class MapJoinOneStringKeyInnerVectorPassThrough_Bench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.VECTOR_PASS_THROUGH); + } + } + + public static class MapJoinOneStringKeyInnerNativeVectorOptimizedBench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); + } + } + + public static class MapJoinOneStringKeyInnerNativeVectorFastBench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.NATIVE_VECTOR_FAST); + } + } + + //----------------------------------------------------------------------------------------------- + + public static class MapJoinOneStringKeyInnerBigOnlyRowModeHashMapBench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.ROW_MODE_HASH_MAP); + } + } + + public static class MapJoinOneStringKeyInnerBigOnlyRowModeOptimized_Bench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + } + + public static class MapJoinOneStringKeyInnerBigOnlyVectorPassThrough_Bench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.VECTOR_PASS_THROUGH); + } + } + + public static class MapJoinOneStringKeyInnerBigOnlyNativeVectorOptimizedBench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); + } + } + + public static class MapJoinOneStringKeyInnerBigOnlyNativeVectorFastBench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.NATIVE_VECTOR_FAST); + } + } + + //----------------------------------------------------------------------------------------------- + + public static class MapJoinOneStringKeyLeftSemiRowModeHashMapBench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.ROW_MODE_HASH_MAP); + } + } + + public static class MapJoinOneStringKeyLeftSemiRowModeOptimized_Bench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + } + + public static class MapJoinOneStringKeyLeftSemiVectorPassThrough_Bench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.VECTOR_PASS_THROUGH); + } + } + + public static class MapJoinOneStringKeyLeftSemiNativeVectorOptimizedBench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); + } + } + + public static class MapJoinOneStringKeyLeftSemiNativeVectorFastBench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.NATIVE_VECTOR_FAST); + } + } + + //----------------------------------------------------------------------------------------------- + + public static class MapJoinOneStringKeyOuterRowModeHashMapBench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.ROW_MODE_HASH_MAP); + } + } + + public static class MapJoinOneStringKeyOuterRowModeOptimized_Bench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + } + + public static class MapJoinOneStringKeyOuterVectorPassThrough_Bench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.VECTOR_PASS_THROUGH); + } + } + + public static class MapJoinOneStringKeyOuterNativeVectorOptimizedBench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); + } + } + + public static class MapJoinOneStringKeyOuterNativeVectorFastBench extends MapJoinOneStringKeyBenchBase { + + @Setup + public void setup() throws Exception { + doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.NATIVE_VECTOR_FAST); + } + } + + //----------------------------------------------------------------------------------------------- + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(".*" + MapJoinOneStringKeyBench.class.getSimpleName() + ".*") + .build(); + new Runner(opt).run(); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java ---------------------------------------------------------------------- diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java new file mode 100644 index 0000000..4becd61 --- /dev/null +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.benchmark.vectorization.mapjoin; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +public abstract class MapJoinOneStringKeyBenchBase extends AbstractMapJoin { + + public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, + MapJoinTestImplementation mapJoinImplementation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + long seed = 2543; + + int rowCount = 100000; // 100,000. + + String[] bigTableColumnNames = new String[] {"b1"}; + TypeInfo[] bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.stringTypeInfo}; + int[] bigTableKeyColumnNums = new int[] {0}; + + String[] smallTableValueColumnNames = new String[] {"sv1", "sv2"}; + TypeInfo[] smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.timestampTypeInfo}; + + int[] bigTableRetainColumnNums = new int[] {0}; + + int[] smallTableRetainKeyColumnNums = new int[] {}; + int[] smallTableRetainValueColumnNums = new int[] {0, 1}; + + SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + smallTableGenerationParameters.setValueOption(ValueOption.ONLY_ONE); + + setupMapJoin(hiveConf, seed, rowCount, + vectorMapJoinVariation, mapJoinImplementation, + bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + smallTableValueColumnNames, smallTableValueTypeInfos, + bigTableRetainColumnNums, + smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, + smallTableGenerationParameters); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index 384e664..a1e0bab 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -70,6 +70,8 @@ import org.apache.hive.common.util.ReflectionUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.annotations.VisibleForTesting; + import com.esotericsoftware.kryo.KryoException; /** @@ -100,6 +102,8 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem protected HybridHashTableContainer firstSmallTable; // The first small table; // Only this table has spilled big table rows + protected transient boolean isTestingNoHashTableLoad; + /** Kryo ctor. */ protected MapJoinOperator() { super(); @@ -165,6 +169,12 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem generateMapMetaData(); + isTestingNoHashTableLoad = HiveConf.getBoolVar(hconf, + HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD); + if (isTestingNoHashTableLoad) { + return; + } + final ExecMapperContext mapContext = getExecContext(); final MapredContext mrContext = MapredContext.get(); @@ -239,6 +249,14 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem } } + @VisibleForTesting + public void setTestMapJoinTableContainer(int posSmallTable, + MapJoinTableContainer testMapJoinTableContainer, + MapJoinTableContainerSerDe mapJoinTableContainerSerDe) { + mapJoinTables[posSmallTable] = testMapJoinTableContainer; + mapJoinTableSerdes[posSmallTable] = mapJoinTableContainerSerDe; + } + @Override protected List<ObjectInspector> getValueObjectInspectors( byte alias, List<ObjectInspector>[] aliasToObjectInspectors) { http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java index 3cf6561..3519e1d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java @@ -76,7 +76,7 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable if (desc.getVectorMode() && HiveConf.getBoolVar( hconf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) { VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); - useFastContainer = vectorDesc != null && vectorDesc.hashTableImplementationType() == + useFastContainer = vectorDesc != null && vectorDesc.getHashTableImplementationType() == VectorMapJoinDesc.HashTableImplementationType.FAST; } } http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorBatchDebug.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorBatchDebug.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorBatchDebug.java new file mode 100644 index 0000000..155c9b8 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorBatchDebug.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class VectorBatchDebug { + private static final Logger LOG = LoggerFactory.getLogger(VectorBatchDebug.class); + + public static String displayBytes(byte[] bytes, int start, int length) { + StringBuilder sb = new StringBuilder(); + for (int i = start; i < start + length; i++) { + char ch = (char) bytes[i]; + if (ch < ' ' || ch > '~') { + sb.append(String.format("\\%03d", bytes[i] & 0xff)); + } else { + sb.append(ch); + } + } + return sb.toString(); + } + + public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, String prefix) { + StringBuilder sb = new StringBuilder(); + sb.append(prefix + " row " + index + " "); + for (int p = 0; p < batch.projectionSize; p++) { + int column = batch.projectedColumns[p]; + if (p == column) { + sb.append("(col " + p + ") "); + } else { + sb.append("(proj col " + p + " col " + column + ") "); + } + ColumnVector colVector = batch.cols[column]; + if (colVector == null) { + sb.append("(null ColumnVector)"); + } else { + boolean isRepeating = colVector.isRepeating; + if (isRepeating) { + sb.append("(repeating)"); + } + index = (isRepeating ? 0 : index); + if (colVector.noNulls || !colVector.isNull[index]) { + if (colVector instanceof LongColumnVector) { + sb.append(((LongColumnVector) colVector).vector[index]); + } else if (colVector instanceof DoubleColumnVector) { + sb.append(((DoubleColumnVector) colVector).vector[index]); + } else if (colVector instanceof BytesColumnVector) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector; + byte[] bytes = bytesColumnVector.vector[index]; + int start = bytesColumnVector.start[index]; + int length = bytesColumnVector.length[index]; + if (bytes == null) { + sb.append("(Unexpected null bytes with start " + start + " length " + length + ")"); + } else { + sb.append("bytes: '" + displayBytes(bytes, start, length) + "'"); + } + } else if (colVector instanceof DecimalColumnVector) { + sb.append(((DecimalColumnVector) colVector).vector[index].toString()); + } else if (colVector instanceof TimestampColumnVector) { + Timestamp timestamp = new Timestamp(0); + ((TimestampColumnVector) colVector).timestampUpdate(timestamp, index); + sb.append(timestamp.toString()); + } else if (colVector instanceof IntervalDayTimeColumnVector) { + HiveIntervalDayTime intervalDayTime = ((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(index); + sb.append(intervalDayTime.toString()); + } else { + sb.append("Unknown"); + } + } else { + sb.append("NULL"); + } + } + sb.append(" "); + } + System.err.println(sb.toString()); + // LOG.info(sb.toString()); + } + + public static void debugDisplayBatch(VectorizedRowBatch batch, String prefix) { + for (int i = 0; i < batch.size; i++) { + int index = (batch.selectedInUse ? batch.selected[i] : i); + debugDisplayOneRow(batch, index, prefix); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index 7b8e7ea..25d8c8d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -33,6 +33,8 @@ import org.apache.hadoop.hive.ql.HashTableLoaderFactory; import org.apache.hadoop.hive.ql.exec.HashTableLoader; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; @@ -65,6 +67,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import com.google.common.annotations.VisibleForTesting; + import com.google.common.base.Preconditions; /** @@ -362,9 +366,9 @@ private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); @Override protected HashTableLoader getHashTableLoader(Configuration hconf) { VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) conf.getVectorDesc(); - HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType(); + HashTableImplementationType hashTableImplementationType = vectorDesc.getHashTableImplementationType(); HashTableLoader hashTableLoader; - switch (vectorDesc.hashTableImplementationType()) { + switch (vectorDesc.getHashTableImplementationType()) { case OPTIMIZED: // Use the Tez hash table loader. hashTableLoader = HashTableLoaderFactory.getLoader(hconf); @@ -442,9 +446,32 @@ private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); // setup mapJoinTables and serdes super.completeInitializationOp(os); + if (isTestingNoHashTableLoad) { + return; + } + + MapJoinTableContainer mapJoinTableContainer = + mapJoinTables[posSingleVectorMapJoinSmallTable]; + + setUpHashTable(); + } + + @VisibleForTesting + @Override + public void setTestMapJoinTableContainer(int posSmallTable, + MapJoinTableContainer testMapJoinTableContainer, + MapJoinTableContainerSerDe mapJoinTableContainerSerDe) { + + mapJoinTables[posSingleVectorMapJoinSmallTable] = testMapJoinTableContainer; + + setUpHashTable(); + } + + private void setUpHashTable() { + VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) conf.getVectorDesc(); - HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType(); - switch (vectorDesc.hashTableImplementationType()) { + HashTableImplementationType hashTableImplementationType = vectorDesc.getHashTableImplementationType(); + switch (vectorDesc.getHashTableImplementationType()) { case OPTIMIZED: { // Create our vector map join optimized hash table variation *above* the http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java index 2fe4b93..90b65c3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java @@ -95,10 +95,10 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai boolean isOuterJoin = !desc.isNoOuterJoin(); VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); - HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType(); - HashTableKind hashTableKind = vectorDesc.hashTableKind(); - HashTableKeyType hashTableKeyType = vectorDesc.hashTableKeyType(); - boolean minMaxEnabled = vectorDesc.minMaxEnabled(); + HashTableImplementationType hashTableImplementationType = vectorDesc.getHashTableImplementationType(); + HashTableKind hashTableKind = vectorDesc.getHashTableKind(); + HashTableKeyType hashTableKeyType = vectorDesc.getHashTableKeyType(); + boolean minMaxEnabled = vectorDesc.getMinMaxEnabled(); int writeBufferSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE); http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java index 111a6d2..5013798 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java @@ -41,9 +41,9 @@ public class VectorMapJoinOptimizedCreateHashTable { boolean isOuterJoin = !desc.isNoOuterJoin(); VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); - HashTableKind hashTableKind = vectorDesc.hashTableKind(); - HashTableKeyType hashTableKeyType = vectorDesc.hashTableKeyType(); - boolean minMaxEnabled = vectorDesc.minMaxEnabled(); + HashTableKind hashTableKind = vectorDesc.getHashTableKind(); + HashTableKeyType hashTableKeyType = vectorDesc.getHashTableKeyType(); + boolean minMaxEnabled = vectorDesc.getMinMaxEnabled(); VectorMapJoinOptimizedHashTable hashTable = null; http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e9b0a26..13d7730 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -151,7 +151,7 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.OperatorVariation; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; @@ -2808,7 +2808,7 @@ public class Vectorizer implements PhysicalPlanResolver { HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE; HashTableKind hashTableKind = HashTableKind.NONE; HashTableKeyType hashTableKeyType = HashTableKeyType.NONE; - OperatorVariation operatorVariation = OperatorVariation.NONE; + VectorMapJoinVariation vectorMapJoinVariation = VectorMapJoinVariation.NONE; if (vectorDesc.getIsFastHashTableEnabled()) { hashTableImplementationType = HashTableImplementationType.FAST; @@ -2866,20 +2866,20 @@ public class Vectorizer implements PhysicalPlanResolver { switch (joinType) { case JoinDesc.INNER_JOIN: if (!isInnerBigOnly) { - operatorVariation = OperatorVariation.INNER; + vectorMapJoinVariation = VectorMapJoinVariation.INNER; hashTableKind = HashTableKind.HASH_MAP; } else { - operatorVariation = OperatorVariation.INNER_BIG_ONLY; + vectorMapJoinVariation = VectorMapJoinVariation.INNER_BIG_ONLY; hashTableKind = HashTableKind.HASH_MULTISET; } break; case JoinDesc.LEFT_OUTER_JOIN: case JoinDesc.RIGHT_OUTER_JOIN: - operatorVariation = OperatorVariation.OUTER; + vectorMapJoinVariation = VectorMapJoinVariation.OUTER; hashTableKind = HashTableKind.HASH_MAP; break; case JoinDesc.LEFT_SEMI_JOIN: - operatorVariation = OperatorVariation.LEFT_SEMI; + vectorMapJoinVariation = VectorMapJoinVariation.LEFT_SEMI; hashTableKind = HashTableKind.HASH_SET; break; default: @@ -2894,7 +2894,7 @@ public class Vectorizer implements PhysicalPlanResolver { case SHORT: case INT: case LONG: - switch (operatorVariation) { + switch (vectorMapJoinVariation) { case INNER: opClass = VectorMapJoinInnerLongOperator.class; break; @@ -2908,11 +2908,11 @@ public class Vectorizer implements PhysicalPlanResolver { opClass = VectorMapJoinOuterLongOperator.class; break; default: - throw new HiveException("Unknown operator variation " + operatorVariation); + throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } break; case STRING: - switch (operatorVariation) { + switch (vectorMapJoinVariation) { case INNER: opClass = VectorMapJoinInnerStringOperator.class; break; @@ -2926,11 +2926,11 @@ public class Vectorizer implements PhysicalPlanResolver { opClass = VectorMapJoinOuterStringOperator.class; break; default: - throw new HiveException("Unknown operator variation " + operatorVariation); + throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } break; case MULTI_KEY: - switch (operatorVariation) { + switch (vectorMapJoinVariation) { case INNER: opClass = VectorMapJoinInnerMultiKeyOperator.class; break; @@ -2944,7 +2944,7 @@ public class Vectorizer implements PhysicalPlanResolver { opClass = VectorMapJoinOuterMultiKeyOperator.class; break; default: - throw new HiveException("Unknown operator variation " + operatorVariation); + throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } break; default: @@ -2957,7 +2957,7 @@ public class Vectorizer implements PhysicalPlanResolver { vectorDesc.setHashTableImplementationType(hashTableImplementationType); vectorDesc.setHashTableKind(hashTableKind); vectorDesc.setHashTableKeyType(hashTableKeyType); - vectorDesc.setOperatorVariation(operatorVariation); + vectorDesc.setVectorMapJoinVariation(vectorMapJoinVariation); vectorDesc.setMinMaxEnabled(minMaxEnabled); vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo); http://git-wip-us.apache.org/repos/asf/hive/blob/3c7fb2a9/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index 0d8e459..e1b4ae6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -35,7 +35,7 @@ import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.OperatorVariation; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; /** * Map Join operator Descriptor implementation. @@ -404,7 +404,7 @@ public class MapJoinDesc extends JoinDesc implements Serializable { public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, VectorDesc vectorDesc) { // VectorMapJoinOperator is not native vectorized. - super(vectorDesc, ((VectorMapJoinDesc) vectorDesc).hashTableImplementationType() != HashTableImplementationType.NONE); + super(vectorDesc, ((VectorMapJoinDesc) vectorDesc).getHashTableImplementationType() != HashTableImplementationType.NONE); this.mapJoinDesc = mapJoinDesc; vectorMapJoinDesc = (VectorMapJoinDesc) vectorDesc; vectorMapJoinInfo = vectorMapJoinDesc.getVectorMapJoinInfo(); @@ -539,7 +539,7 @@ public class MapJoinDesc extends JoinDesc implements Serializable { @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableOuterKeyMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List<String> getBigTableOuterKey() { - if (!isNative || vectorMapJoinDesc.operatorVariation() != OperatorVariation.OUTER) { + if (!isNative || vectorMapJoinDesc.getVectorMapJoinVariation() != VectorMapJoinVariation.OUTER) { return null; } return columnMappingToStringList(vectorMapJoinInfo.getBigTableOuterKeyMapping());