http://git-wip-us.apache.org/repos/asf/hive/blob/a6fab143/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 0ad6816..9938fb0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -389,6 +389,9 @@ public class VectorizationContext { public static final Pattern listTypePattern = Pattern.compile("array.*", Pattern.CASE_INSENSITIVE); + public static final Pattern mapTypePattern = Pattern.compile("map.*", + Pattern.CASE_INSENSITIVE); + //Map column number to type private OutputColumnManager ocm; @@ -3265,6 +3268,8 @@ public class VectorizationContext { return "Struct"; case LIST: return "List"; + case MAP: + return "Map"; default: throw new HiveException("Unexpected hive type name " + hiveTypeName); }
http://git-wip-us.apache.org/repos/asf/hive/blob/a6fab143/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java new file mode 100644 index 0000000..44ebbf3 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Superclass to support vectorized functions that take a parameter as key of Map + * and return the value of Map. + */ +public abstract class VectorUDFMapIndexBase extends VectorExpression { + + private static final long serialVersionUID = 1L; + + public VectorUDFMapIndexBase() { + super(); + } + + public VectorUDFMapIndexBase(int outputColumnNum) { + super(outputColumnNum); + } + + /** + * The index array of MapColumnVector is used to get the value from MapColumnVector based on the + * index, the following are the steps to get it: + * 1. Get the current key which is a scalar or from a ColumnVector. + * 2. Compare the current key and the key from MapColumnVector. + * 3. Set the index of MapColumnVector to the result array if the keys are same. + */ + protected int[] getMapValueIndex(MapColumnVector mapV, VectorizedRowBatch batch) { + int[] indexArray = new int[VectorizedRowBatch.DEFAULT_SIZE]; + for (int i = 0; i < batch.size; i++) { + boolean findKey = false; + int offset = (batch.selectedInUse) ? batch.selected[i] : i; + Object columnKey = getCurrentKey(offset); + for (int j = 0; j < mapV.lengths[offset]; j++) { + int index = (int)(mapV.offsets[offset] + j); + Object tempKey = getKeyByIndex(mapV.keys, index); + if (compareKey(columnKey, tempKey)) { + indexArray[offset] = j; + findKey = true; + break; + } + } + if (!findKey) { + indexArray[offset] = -1; + } + if (mapV.isRepeating) { + break; + } + } + return indexArray; + } + + protected boolean compareKey(Object columnKey, Object otherKey) { + if (columnKey == null && otherKey == null) { + return true; + } else if (columnKey != null && otherKey != null) { + return compareKeyInternal(columnKey, otherKey); + } else { + return false; + } + } + + protected boolean compareKeyInternal(Object columnKey, Object otherKey) { + return columnKey.equals(otherKey); + } + + abstract Object getKeyByIndex(ColumnVector cv, int index); + + abstract Object getCurrentKey(int index); +} http://git-wip-us.apache.org/repos/asf/hive/blob/a6fab143/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java new file mode 100644 index 0000000..b55bd98 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Superclass to support vectorized functions that take a column value as key of Map + * and return the value of Map. + */ +public abstract class VectorUDFMapIndexBaseCol extends VectorUDFMapIndexBase { + + private static final long serialVersionUID = 1L; + + private int mapColumnNum; + private int indexColumnNum; + private ColumnVector indexColumnVector; + + public VectorUDFMapIndexBaseCol() { + super(); + } + + public VectorUDFMapIndexBaseCol(int mapColumnNum, int indexColumnNum, int outputColumnNum) { + super(outputColumnNum); + this.mapColumnNum = mapColumnNum; + this.indexColumnNum = indexColumnNum; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + ColumnVector outV = batch.cols[outputColumnNum]; + MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum]; + // indexColumnVector includes the keys of Map + indexColumnVector = batch.cols[indexColumnNum]; + + outV.noNulls = true; + int[] mapValueIndex; + if (mapV.isRepeating) { + if (mapV.isNull[0]) { + outV.isNull[0] = true; + outV.noNulls = false; + outV.isRepeating = true; + } else { + mapValueIndex = getMapValueIndex(mapV, batch); + if (indexColumnVector.isRepeating) { + // the key is not found in MapColumnVector, set the output as null ColumnVector + if (mapValueIndex[0] == -1) { + outV.isNull[0] = true; + outV.noNulls = false; + } else { + // the key is found in MapColumnVector, set the value + outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); + outV.isNull[0] = false; + outV.noNulls = true; + } + outV.isRepeating = true; + } else { + setUnRepeatingOutVector(batch, mapV, outV, mapValueIndex); + } + } + } else { + mapValueIndex = getMapValueIndex(mapV, batch); + setUnRepeatingOutVector(batch, mapV, outV, mapValueIndex); + } + } + + /** + * Set the output based on the index array of MapColumnVector. + */ + private void setUnRepeatingOutVector(VectorizedRowBatch batch, MapColumnVector mapV, + ColumnVector outV, int[] mapValueIndex) { + for (int i = 0; i < batch.size; i++) { + int j = (batch.selectedInUse) ? batch.selected[i] : i; + if (mapV.isNull[j] || mapValueIndex[j] == -1) { + outV.isNull[j] = true; + outV.noNulls = false; + } else { + outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values); + outV.isNull[j] = false; + } + } + outV.isRepeating = false; + } + + @Override + protected Object getCurrentKey(int index) { + return getKeyByIndex(indexColumnVector, index); + } + + public int getMapColumnNum() { + return mapColumnNum; + } + + public int getIndexColumnNum() { + return indexColumnNum; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a6fab143/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java new file mode 100644 index 0000000..512f6eb --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Superclass to support vectorized functions that take a scalar as key of Map + * and return the value of Map. + */ +public abstract class VectorUDFMapIndexBaseScalar extends VectorUDFMapIndexBase { + + private static final long serialVersionUID = 1L; + + private int mapColumnNum; + + public VectorUDFMapIndexBaseScalar() { + super(); + } + + public VectorUDFMapIndexBaseScalar(int mapColumnNum, int outputColumnNum) { + super(outputColumnNum); + this.mapColumnNum = mapColumnNum; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + ColumnVector outV = batch.cols[outputColumnNum]; + MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum]; + + outV.noNulls = true; + int[] mapValueIndex; + if (mapV.isRepeating) { + if (mapV.isNull[0]) { + outV.isNull[0] = true; + outV.noNulls = false; + } else { + mapValueIndex = getMapValueIndex(mapV, batch); + if (mapValueIndex[0] == -1) { + // the key is not found in MapColumnVector, set the output as null ColumnVector + outV.isNull[0] = true; + outV.noNulls = false; + } else { + // the key is found in MapColumnVector, set the value + outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); + outV.noNulls = true; + } + } + outV.isRepeating = true; + } else { + mapValueIndex = getMapValueIndex(mapV, batch); + for (int i = 0; i < batch.size; i++) { + int j = (batch.selectedInUse) ? batch.selected[i] : i; + if (mapV.isNull[j] || mapValueIndex[j] == -1) { + outV.isNull[j] = true; + outV.noNulls = false; + } else { + outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values); + outV.isNull[j] = false; + } + } + outV.isRepeating = false; + } + } + + public int getMapColumnNum() { + return mapColumnNum; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a6fab143/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java new file mode 100644 index 0000000..34f73a4 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Returns value of Map. + * Extends {@link VectorUDFMapIndexBaseCol} + */ +public class VectorUDFMapIndexDoubleCol extends VectorUDFMapIndexBaseCol { + + public VectorUDFMapIndexDoubleCol() { + super(); + } + + public VectorUDFMapIndexDoubleCol(int mapColumnNum, int indexColumnNum, int outputColumnNum) { + super(mapColumnNum, indexColumnNum, outputColumnNum); + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, getMapColumnNum()) + ", key: " + + getColumnParamString(1, getIndexColumnNum()); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.MAP, + VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } + + @Override + protected Object getKeyByIndex(ColumnVector cv, int index) { + return ((DoubleColumnVector) cv).vector[index]; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a6fab143/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java new file mode 100644 index 0000000..1abcdbe --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Returns value of Map. + * Extends {@link VectorUDFMapIndexBaseScalar} + */ +public class VectorUDFMapIndexDoubleScalar extends VectorUDFMapIndexBaseScalar { + + private HiveDecimal key; + + public VectorUDFMapIndexDoubleScalar() { + super(); + } + + public VectorUDFMapIndexDoubleScalar(int mapColumnNum, HiveDecimal key, int outputColumnNum) { + super(mapColumnNum, outputColumnNum); + this.key = key; + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, getMapColumnNum()) + ", key: " + key; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.MAP, + VectorExpressionDescriptor.ArgumentType.DECIMAL) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } + + @Override + protected Object getKeyByIndex(ColumnVector cv, int index) { + return ((DoubleColumnVector) cv).vector[index]; + } + + @Override + public Object getCurrentKey(int index) { + return key; + } + + @Override + protected boolean compareKeyInternal(Object columnKey, Object otherKey) { + return otherKey.equals(((HiveDecimal) columnKey).doubleValue()); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a6fab143/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java new file mode 100644 index 0000000..f1c4a69 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Returns value of Map. + * Extends {@link VectorUDFMapIndexBaseCol} + */ +public class VectorUDFMapIndexLongCol extends VectorUDFMapIndexBaseCol { + + public VectorUDFMapIndexLongCol() { + super(); + } + + public VectorUDFMapIndexLongCol(int mapColumnNum, int indexColumnNum, int outputColumnNum) { + super(mapColumnNum, indexColumnNum, outputColumnNum); + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, getMapColumnNum()) + ", key: " + + getColumnParamString(1, getIndexColumnNum()); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.MAP, + VectorExpressionDescriptor.ArgumentType.INT_FAMILY) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } + + @Override + protected Object getKeyByIndex(ColumnVector cv, int index) { + return ((LongColumnVector) cv).vector[index]; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a6fab143/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java new file mode 100644 index 0000000..eb66826 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Returns value of Map. + * Extends {@link VectorUDFMapIndexBaseScalar} + */ +public class VectorUDFMapIndexLongScalar extends VectorUDFMapIndexBaseScalar { + + private long key; + + public VectorUDFMapIndexLongScalar() { + super(); + } + + public VectorUDFMapIndexLongScalar(int mapColumnNum, long key, int outputColumnNum) { + super(mapColumnNum, outputColumnNum); + this.key = key; + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, getMapColumnNum()) + ", key: " + key; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.MAP, + VectorExpressionDescriptor.ArgumentType.INT_FAMILY) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } + + @Override + protected Object getKeyByIndex(ColumnVector cv, int index) { + return ((LongColumnVector) cv).vector[index]; + } + + @Override + public Object getCurrentKey(int index) { + return key; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a6fab143/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java new file mode 100644 index 0000000..576ebe9 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +import java.util.Arrays; + +/** + * Returns value of Map. + * Extends {@link VectorUDFMapIndexBaseCol} + */ +public class VectorUDFMapIndexStringCol extends VectorUDFMapIndexBaseCol { + + public VectorUDFMapIndexStringCol() { + super(); + } + + public VectorUDFMapIndexStringCol(int mapColumnNum, int indexColumnNum, int outputColumnNum) { + super(mapColumnNum, indexColumnNum, outputColumnNum); + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, getMapColumnNum()) + ", key: " + + getColumnParamString(1, getIndexColumnNum()); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.MAP, + VectorExpressionDescriptor.ArgumentType.STRING_FAMILY) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } + + @Override + protected Object getKeyByIndex(ColumnVector cv, int index) { + BytesColumnVector bytesCV = (BytesColumnVector) cv; + return ArrayUtils.subarray(bytesCV.vector[index], bytesCV.start[index], + bytesCV.start[index] + bytesCV.length[index]); + } + + @Override + protected boolean compareKeyInternal(Object columnKey, Object otherKey) { + return Arrays.equals((byte[])columnKey, (byte[]) otherKey); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a6fab143/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java new file mode 100644 index 0000000..7a87972 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +import java.util.Arrays; + +/** + * Returns value of Map. + * Extends {@link VectorUDFMapIndexBaseScalar} + */ +public class VectorUDFMapIndexStringScalar extends VectorUDFMapIndexBaseScalar { + + private byte[] key; + + public VectorUDFMapIndexStringScalar() { + super(); + } + + public VectorUDFMapIndexStringScalar(int mapColumnNum, byte[] key, int outputColumnNum) { + super(mapColumnNum, outputColumnNum); + this.key = key; + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, getMapColumnNum()) + ", key: " + new String(key); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.MAP, + VectorExpressionDescriptor.ArgumentType.STRING_FAMILY) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } + + @Override + protected Object getKeyByIndex(ColumnVector cv, int index) { + BytesColumnVector bytesCV = (BytesColumnVector) cv; + return ArrayUtils.subarray(bytesCV.vector[index], bytesCV.start[index], + bytesCV.start[index] + bytesCV.length[index]); + } + + @Override + public Object getCurrentKey(int index) { + return key; + } + + @Override + protected boolean compareKeyInternal(Object columnKey, Object otherKey) { + return Arrays.equals((byte[])columnKey, (byte[]) otherKey); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a6fab143/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java index 3db96ec..e2f61bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java @@ -25,6 +25,12 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.ListIndexColColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.ListIndexColScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexDoubleCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexLongCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexStringCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexStringScalar; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; @@ -40,7 +46,10 @@ import org.apache.hadoop.io.IntWritable; * */ @Description(name = "index", value = "_FUNC_(a, n) - Returns the n-th element of a ") -@VectorizedExpressions({ListIndexColScalar.class, ListIndexColColumn.class}) +@VectorizedExpressions({ListIndexColScalar.class, ListIndexColColumn.class, + VectorUDFMapIndexStringScalar.class, VectorUDFMapIndexLongScalar.class, + VectorUDFMapIndexDoubleScalar.class, VectorUDFMapIndexStringCol.class, + VectorUDFMapIndexLongCol.class, VectorUDFMapIndexDoubleCol.class}) public class GenericUDFIndex extends GenericUDF { private transient MapObjectInspector mapOI; http://git-wip-us.apache.org/repos/asf/hive/blob/a6fab143/ql/src/test/queries/clientpositive/parquet_map_type_vectorization.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_map_type_vectorization.q b/ql/src/test/queries/clientpositive/parquet_map_type_vectorization.q new file mode 100644 index 0000000..1589545 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_map_type_vectorization.q @@ -0,0 +1,73 @@ +set hive.mapred.mode=nonstrict; +set hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +DROP TABLE parquet_map_type_staging; +DROP TABLE parquet_map_type; + +CREATE TABLE parquet_map_type_staging ( +id int, +stringMap map<string, string>, +intMap map<int, int>, +doubleMap map<double, double>, +stringIndex string, +intIndex int, +doubleIndex double +) ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' + MAP KEYS TERMINATED BY ':'; + +CREATE TABLE parquet_map_type ( +id int, +stringMap map<string, string>, +intMap map<int, int>, +doubleMap map<double, double>, +stringIndex string, +intIndex int, +doubleIndex double +) STORED AS PARQUET; + +-- test data size < 1024 +LOAD DATA LOCAL INPATH '../../data/files/parquet_vector_map_type.txt' OVERWRITE INTO TABLE parquet_map_type_staging; +INSERT OVERWRITE TABLE parquet_map_type +SELECT id, stringMap, intMap, doubleMap, stringIndex, intIndex, doubleIndex FROM parquet_map_type_staging where id < 1024; + +-- verify the row number +select count(*) from parquet_map_type; +-- test element select with constant and variable +explain vectorization expression select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], +doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10; +select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], +stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10; +-- test complex select with map +explain vectorization expression select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] +from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10; +select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] +from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10; + +-- test data size = 1024 +INSERT OVERWRITE TABLE parquet_map_type +SELECT id, stringMap, intMap, doubleMap, stringIndex, intIndex, doubleIndex FROM parquet_map_type_staging where id < 1025; + +-- verify the row number +select count(*) from parquet_map_type; +-- test element select with constant and variable +select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], +stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10; +-- test complex select with map +select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] +from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10; + +-- test data size = 1025 +INSERT OVERWRITE TABLE parquet_map_type +SELECT id, stringMap, intMap, doubleMap, stringIndex, intIndex, doubleIndex FROM parquet_map_type_staging; + +-- verify the row number +select count(*) from parquet_map_type; +-- test element select with constant and variable +select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], +stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10; +-- test complex select with map +select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] +from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10; http://git-wip-us.apache.org/repos/asf/hive/blob/a6fab143/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out new file mode 100644 index 0000000..ae7db3f --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out @@ -0,0 +1,500 @@ +PREHOOK: query: DROP TABLE parquet_map_type_staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_map_type_staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE parquet_map_type +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_map_type +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE parquet_map_type_staging ( +id int, +stringMap map<string, string>, +intMap map<int, int>, +doubleMap map<double, double>, +stringIndex string, +intIndex int, +doubleIndex double +) ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' + MAP KEYS TERMINATED BY ':' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_map_type_staging +POSTHOOK: query: CREATE TABLE parquet_map_type_staging ( +id int, +stringMap map<string, string>, +intMap map<int, int>, +doubleMap map<double, double>, +stringIndex string, +intIndex int, +doubleIndex double +) ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' + MAP KEYS TERMINATED BY ':' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_map_type_staging +PREHOOK: query: CREATE TABLE parquet_map_type ( +id int, +stringMap map<string, string>, +intMap map<int, int>, +doubleMap map<double, double>, +stringIndex string, +intIndex int, +doubleIndex double +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_map_type +POSTHOOK: query: CREATE TABLE parquet_map_type ( +id int, +stringMap map<string, string>, +intMap map<int, int>, +doubleMap map<double, double>, +stringIndex string, +intIndex int, +doubleIndex double +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_map_type +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_vector_map_type.txt' OVERWRITE INTO TABLE parquet_map_type_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_map_type_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_vector_map_type.txt' OVERWRITE INTO TABLE parquet_map_type_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_map_type_staging +PREHOOK: query: INSERT OVERWRITE TABLE parquet_map_type +SELECT id, stringMap, intMap, doubleMap, stringIndex, intIndex, doubleIndex FROM parquet_map_type_staging where id < 1024 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_map_type_staging +PREHOOK: Output: default@parquet_map_type +POSTHOOK: query: INSERT OVERWRITE TABLE parquet_map_type +SELECT id, stringMap, intMap, doubleMap, stringIndex, intIndex, doubleIndex FROM parquet_map_type_staging where id < 1024 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_map_type_staging +POSTHOOK: Output: default@parquet_map_type +POSTHOOK: Lineage: parquet_map_type.doubleindex SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:doubleindex, type:double, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.doublemap SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:doublemap, type:map<double,double>, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.id SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.intindex SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:intindex, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.intmap SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:intmap, type:map<int,int>, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.stringindex SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:stringindex, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:stringmap, type:map<string,string>, comment:null), ] +PREHOOK: query: select count(*) from parquet_map_type +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from parquet_map_type +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +1023 +PREHOOK: query: explain vectorization expression select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], +doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], +doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_map_type + Statistics: Num rows: 1023 Data size: 2183412 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: stringmap (type: map<string,string>), intmap (type: map<int,int>), doublemap (type: map<double,double>), stringmap['k2'] (type: string), intmap[456] (type: int), doublemap[123.123] (type: double), stringmap[stringindex] (type: string), intmap[intindex] (type: int), doublemap[doubleindex] (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2, 3, 8, 9, 10, 11, 12, 13] + selectExpressions: VectorUDFMapIndexStringScalar(col 1:map<string,string>, key: k2) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map<int,int>, key: 456) -> 9:int, VectorUDFMapIndexDoubleScalar(col 3:map<double,double>, key: 123.123) -> 10:double, VectorUDFMapIndexStringCol(col 1:map<string,string>, key: col 4:string) -> 11:string, VectorUDFMapIndexLongCol(col 2:map<int,int>, key: col 5:int) -> 12:int, VectorUDFMapIndexDoubleCol(col 3:map<double,double>, key: col 6:double) -> 13:double + Statistics: Num rows: 1023 Data size: 2183412 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 21340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 21340 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], +stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], +stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +{"k1":"v1","k2":"v1-2"} {456:2,123:1} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 +{"k1":"v2","k2":"v2-2"} {456:4,123:3} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 +{"k1":"v3","k2":"v3-2"} {456:6,123:5} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 +{"k1":"v4","k2":"v4-2"} {456:8,123:7} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 +{"k1":"v5","k2":"v5-2"} {456:10,123:9} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 +{"k1":"v6","k2":"v6-2"} {456:12,123:11} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 +{"k1":"v7","k2":"v7-2"} {456:14,123:13} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 +{"k1":"v8","k2":"v8-2"} {456:16,123:15} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 +{"k1":"v9","k2":"v9-2"} {456:18,123:17} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 +{"k1":"v10","k2":"v10-2"} {456:20,123:19} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 +PREHOOK: query: explain vectorization expression select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] +from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] +from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_map_type + Statistics: Num rows: 1023 Data size: 1992704 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringColLikeStringScalar(col 8:string, pattern v100%)(children: VectorUDFMapIndexStringScalar(col 1:map<string,string>, key: k1) -> 8:string) + predicate: (stringmap['k1'] like 'v100%') (type: boolean) + Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: stringmap['k1'] (type: string), intmap[123] (type: int), doublemap[123.123] (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [8, 9, 10] + selectExpressions: VectorUDFMapIndexStringScalar(col 1:map<string,string>, key: k1) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map<int,int>, key: 123) -> 9:int, VectorUDFMapIndexDoubleScalar(col 3:map<double,double>, key: 123.123) -> 10:double + Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1), sum(_col2) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 9:int) -> bigint, VectorUDAFSumDouble(col 10:double) -> double + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 8:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint), _col2 (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1] + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 255 Data size: 496715 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col2 (type: double), _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2, 0] + Statistics: Num rows: 255 Data size: 496715 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 255 Data size: 496715 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: bigint), _col1 (type: double) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: double), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2, 0] + Statistics: Num rows: 255 Data size: 496715 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 19470 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 19470 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] +from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] +from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +199 100.1 v100 +1999 1000.1 v1000 +2001 1001.1 v1001 +2003 1002.1 v1002 +2005 1003.1 v1003 +2007 1004.1 v1004 +2009 1005.1 v1005 +2011 1006.1 v1006 +2013 1007.1 v1007 +2015 1008.1 v1008 +PREHOOK: query: INSERT OVERWRITE TABLE parquet_map_type +SELECT id, stringMap, intMap, doubleMap, stringIndex, intIndex, doubleIndex FROM parquet_map_type_staging where id < 1025 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_map_type_staging +PREHOOK: Output: default@parquet_map_type +POSTHOOK: query: INSERT OVERWRITE TABLE parquet_map_type +SELECT id, stringMap, intMap, doubleMap, stringIndex, intIndex, doubleIndex FROM parquet_map_type_staging where id < 1025 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_map_type_staging +POSTHOOK: Output: default@parquet_map_type +POSTHOOK: Lineage: parquet_map_type.doubleindex SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:doubleindex, type:double, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.doublemap SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:doublemap, type:map<double,double>, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.id SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.intindex SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:intindex, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.intmap SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:intmap, type:map<int,int>, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.stringindex SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:stringindex, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:stringmap, type:map<string,string>, comment:null), ] +PREHOOK: query: select count(*) from parquet_map_type +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from parquet_map_type +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +1024 +PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], +stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], +stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +{"k1":"v1","k2":"v1-2"} {456:2,123:1} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 +{"k1":"v2","k2":"v2-2"} {456:4,123:3} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 +{"k1":"v3","k2":"v3-2"} {456:6,123:5} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 +{"k1":"v4","k2":"v4-2"} {456:8,123:7} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 +{"k1":"v5","k2":"v5-2"} {456:10,123:9} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 +{"k1":"v6","k2":"v6-2"} {456:12,123:11} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 +{"k1":"v7","k2":"v7-2"} {456:14,123:13} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 +{"k1":"v8","k2":"v8-2"} {456:16,123:15} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 +{"k1":"v9","k2":"v9-2"} {456:18,123:17} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 +{"k1":"v10","k2":"v10-2"} {456:20,123:19} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 +PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] +from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] +from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +199 100.1 v100 +1999 1000.1 v1000 +2001 1001.1 v1001 +2003 1002.1 v1002 +2005 1003.1 v1003 +2007 1004.1 v1004 +2009 1005.1 v1005 +2011 1006.1 v1006 +2013 1007.1 v1007 +2015 1008.1 v1008 +PREHOOK: query: INSERT OVERWRITE TABLE parquet_map_type +SELECT id, stringMap, intMap, doubleMap, stringIndex, intIndex, doubleIndex FROM parquet_map_type_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_map_type_staging +PREHOOK: Output: default@parquet_map_type +POSTHOOK: query: INSERT OVERWRITE TABLE parquet_map_type +SELECT id, stringMap, intMap, doubleMap, stringIndex, intIndex, doubleIndex FROM parquet_map_type_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_map_type_staging +POSTHOOK: Output: default@parquet_map_type +POSTHOOK: Lineage: parquet_map_type.doubleindex SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:doubleindex, type:double, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.doublemap SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:doublemap, type:map<double,double>, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.id SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.intindex SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:intindex, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.intmap SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:intmap, type:map<int,int>, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.stringindex SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:stringindex, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging)parquet_map_type_staging.FieldSchema(name:stringmap, type:map<string,string>, comment:null), ] +PREHOOK: query: select count(*) from parquet_map_type +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from parquet_map_type +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +1025 +PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], +stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], +stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +{"k1":"v1","k2":"v1-2"} {456:2,123:1} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 +{"k1":"v2","k2":"v2-2"} {456:4,123:3} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 +{"k1":"v3","k2":"v3-2"} {456:6,123:5} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 +{"k1":"v4","k2":"v4-2"} {456:8,123:7} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 +{"k1":"v5","k2":"v5-2"} {456:10,123:9} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 +{"k1":"v6","k2":"v6-2"} {456:12,123:11} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 +{"k1":"v7","k2":"v7-2"} {456:14,123:13} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 +{"k1":"v8","k2":"v8-2"} {456:16,123:15} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 +{"k1":"v9","k2":"v9-2"} {456:18,123:17} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 +{"k1":"v10","k2":"v10-2"} {456:20,123:19} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 +PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] +from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] +from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_map_type +#### A masked pattern was here #### +199 100.1 v100 +1999 1000.1 v1000 +2001 1001.1 v1001 +2003 1002.1 v1002 +2005 1003.1 v1003 +2007 1004.1 v1004 +2009 1005.1 v1005 +2011 1006.1 v1006 +2013 1007.1 v1007 +2015 1008.1 v1008