improve integer type upgrade logic
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/1429da5f Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/1429da5f Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/1429da5f Branch: refs/heads/KYLIN-1971 Commit: 1429da5f650852402d1494785368e9175f13c379 Parents: 615e21d Author: Hongbin Ma <mahong...@apache.org> Authored: Fri Sep 30 18:31:04 2016 +0800 Committer: Hongbin Ma <mahong...@apache.org> Committed: Thu Oct 27 08:30:13 2016 +0800 ---------------------------------------------------------------------- .../filter/EvaluatableFunctionTupleFilter.java | 151 ------------------- .../metadata/filter/TupleFilterSerializer.java | 20 ++- .../apache/kylin/metadata/model/ColumnDesc.java | 20 ++- .../apache/kylin/metadata/tuple/TupleInfo.java | 4 +- .../apache/kylin/query/schema/OLAPTable.java | 31 +++- 5 files changed, 63 insertions(+), 163 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/1429da5f/core-metadata/src/main/java/org/apache/kylin/metadata/filter/EvaluatableFunctionTupleFilter.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/EvaluatableFunctionTupleFilter.java b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/EvaluatableFunctionTupleFilter.java deleted file mode 100644 index ff24172..0000000 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/EvaluatableFunctionTupleFilter.java +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.metadata.filter; - -import java.lang.reflect.InvocationTargetException; -import java.nio.ByteBuffer; -import java.util.Collection; -import java.util.List; - -import org.apache.kylin.common.util.ByteArray; -import org.apache.kylin.common.util.BytesUtil; -import org.apache.kylin.metadata.datatype.DataType; -import org.apache.kylin.metadata.datatype.StringSerializer; -import org.apache.kylin.metadata.model.TblColRef; -import org.apache.kylin.metadata.tuple.IEvaluatableTuple; - -import com.google.common.collect.Lists; - -public class EvaluatableFunctionTupleFilter extends BuiltInFunctionTupleFilter { - - private boolean constantsInitted = false; - - //about non-like - private List<Object> values; - private Object tupleValue; - - public EvaluatableFunctionTupleFilter(String name) { - super(name, FilterOperatorEnum.EVAL_FUNC); - values = Lists.newArrayListWithCapacity(1); - values.add(null); - } - - @Override - public boolean evaluate(IEvaluatableTuple tuple, IFilterCodeSystem cs) { - - // extract tuple value - Object tupleValue = null; - for (TupleFilter filter : this.children) { - if (!isConstant(filter)) { - filter.evaluate(tuple, cs); - tupleValue = filter.getValues().iterator().next(); - } - } - - TblColRef tblColRef = this.getColumn(); - DataType strDataType = DataType.getType("string"); - if (tblColRef.getType() != strDataType) { - throw new IllegalStateException("Only String type is allow in BuiltInFunction"); - } - ByteArray valueByteArray = (ByteArray) tupleValue; - StringSerializer serializer = new StringSerializer(strDataType); - String value = serializer.deserialize(ByteBuffer.wrap(valueByteArray.array(), valueByteArray.offset(), valueByteArray.length())); - - try { - if (isLikeFunction()) { - return (Boolean) invokeFunction(value); - } else { - this.tupleValue = invokeFunction(value); - //convert back to ByteArray format because the outer EvaluatableFunctionTupleFilter assumes input as ByteArray - ByteBuffer buffer = ByteBuffer.allocate(valueByteArray.length() * 2); - serializer.serialize((String) this.tupleValue, buffer); - this.tupleValue = new ByteArray(buffer.array(), 0, buffer.position()); - - return true; - } - } catch (InvocationTargetException | IllegalAccessException e) { - throw new RuntimeException(e); - } - } - - @Override - public Collection<?> getValues() { - this.values.set(0, tupleValue); - return values; - } - - @Override - public void serialize(IFilterCodeSystem<?> cs, ByteBuffer buffer) { - if (!isValid()) { - throw new IllegalStateException("must be valid"); - } - BytesUtil.writeUTFString(name, buffer); - } - - @Override - public void deserialize(IFilterCodeSystem<?> cs, ByteBuffer buffer) { - this.name = BytesUtil.readUTFString(buffer); - this.initMethod(); - } - - @Override - public boolean isEvaluable() { - return true; - } - - private boolean isConstant(TupleFilter filter) { - return (filter instanceof ConstantTupleFilter) || (filter instanceof DynamicTupleFilter); - } - - @Override - public Object invokeFunction(Object input) throws InvocationTargetException, IllegalAccessException { - if (isLikeFunction()) - initConstants(); - return super.invokeFunction(input); - } - - private void initConstants() { - if (constantsInitted) { - return; - } - //will replace the ByteArray pattern to String type - ByteArray byteArray = (ByteArray) methodParams.get(constantPosition); - StringSerializer s = new StringSerializer(DataType.getType("string")); - String pattern = s.deserialize(ByteBuffer.wrap(byteArray.array(), byteArray.offset(), byteArray.length())); - //TODO - //pattern = pattern.toLowerCase();//to remove upper case - methodParams.set(constantPosition, pattern); - constantsInitted = true; - } - - //even for "tolower(s)/toupper(s)/substring(like) like pattern", the like pattern can be used for index searching - public String getLikePattern() { - if (!isLikeFunction()) { - return null; - } - - initConstants(); - return (String) methodParams.get(1); - } - - public boolean isLikeFunction() { - return "like".equalsIgnoreCase(this.getName()); - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/1429da5f/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilterSerializer.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilterSerializer.java b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilterSerializer.java index a051ea9..2df474e 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilterSerializer.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilterSerializer.java @@ -18,6 +18,7 @@ package org.apache.kylin.metadata.filter; +import java.lang.reflect.InvocationTargetException; import java.nio.BufferOverflowException; import java.nio.ByteBuffer; import java.util.HashMap; @@ -29,6 +30,8 @@ import org.apache.kylin.metadata.filter.UDF.MassInTupleFilter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.Maps; + /** * http://eli.thegreenplace.net/2011/09/29/an-interesting-tree-serialization-algorithm-from-dwarf * @@ -45,6 +48,7 @@ public class TupleFilterSerializer { private static final int BUFFER_SIZE = 65536; private static final Map<Integer, TupleFilter.FilterOperatorEnum> ID_OP_MAP = new HashMap<Integer, TupleFilter.FilterOperatorEnum>(); + protected static final Map<TupleFilter.FilterOperatorEnum, Class> extendedTupleFilters = Maps.newHashMap(); static { for (TupleFilter.FilterOperatorEnum op : TupleFilter.FilterOperatorEnum.values()) { @@ -191,14 +195,20 @@ public class TupleFilterSerializer { case UNSUPPORTED: filter = new UnsupportedTupleFilter(op); break; - case EVAL_FUNC: - filter = new EvaluatableFunctionTupleFilter(null); - break; case MASSIN: filter = new MassInTupleFilter(); break; - default: - throw new IllegalStateException("Error FilterOperatorEnum: " + op.getValue()); + default: { + if (extendedTupleFilters.containsKey(op)) { + try { + filter = (TupleFilter) extendedTupleFilters.get(op).getConstructor().newInstance(); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { + throw new RuntimeException(e); + } + } else { + throw new IllegalStateException("Error FilterOperatorEnum: " + op.getValue()); + } + } } return filter; http://git-wip-us.apache.org/repos/asf/kylin/blob/1429da5f/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java index 3bf0de9..a13bd37 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java @@ -26,6 +26,7 @@ import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; /** * Column Metadata from Source. All name should be uppercase. @@ -47,6 +48,7 @@ public class ColumnDesc implements Serializable { // parsed from data type private DataType type; + private DataType upgradedType; private TableDesc table; private int zeroBasedIndex = -1; @@ -80,6 +82,18 @@ public class ColumnDesc implements Serializable { type = DataType.getType(datatype); } + public void setUpgradedType(String datatype) { + this.upgradedType = DataType.getType(datatype); + } + + public DataType getUpgradedType() { + if (this.upgradedType == null) { + return this.type; + } else { + return this.upgradedType; + } + } + public String getId() { return id; } @@ -171,19 +185,19 @@ public class ColumnDesc implements Serializable { if (getClass() != obj.getClass()) return false; ColumnDesc other = (ColumnDesc) obj; - + if (name == null) { if (other.name != null) return false; } else if (!name.equals(other.name)) return false; - + if (datatype == null) { if (other.datatype != null) return false; } else if (!datatype.equals(other.datatype)) return false; - + return true; } http://git-wip-us.apache.org/repos/asf/kylin/blob/1429da5f/core-metadata/src/main/java/org/apache/kylin/metadata/tuple/TupleInfo.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/tuple/TupleInfo.java b/core-metadata/src/main/java/org/apache/kylin/metadata/tuple/TupleInfo.java index f7c3b57..8970124 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/tuple/TupleInfo.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/tuple/TupleInfo.java @@ -94,9 +94,9 @@ public class TupleInfo { columns.add(index, col); if (dataTypeNames.size() > index) - dataTypeNames.set(index, col.getType().getName()); + dataTypeNames.set(index, col.getColumnDesc().getUpgradedType().getName()); else - dataTypeNames.add(index, col.getType().getName()); + dataTypeNames.add(index, col.getColumnDesc().getUpgradedType().getName()); } public List<String> getAllFields() { http://git-wip-us.apache.org/repos/asf/kylin/blob/1429da5f/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java ---------------------------------------------------------------------- diff --git a/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java b/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java index 4994433..ac70716 100644 --- a/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java +++ b/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java @@ -43,15 +43,21 @@ import org.apache.calcite.schema.impl.AbstractTableQueryable; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.kylin.metadata.MetadataManager; import org.apache.kylin.metadata.datatype.DataType; import org.apache.kylin.metadata.model.ColumnDesc; +import org.apache.kylin.metadata.model.DataModelDesc; import org.apache.kylin.metadata.model.FunctionDesc; import org.apache.kylin.metadata.model.MeasureDesc; import org.apache.kylin.metadata.model.TableDesc; import org.apache.kylin.metadata.project.ProjectManager; +import org.apache.kylin.metadata.realization.IRealization; +import org.apache.kylin.metadata.realization.RealizationType; import org.apache.kylin.query.enumerator.OLAPQuery; import org.apache.kylin.query.enumerator.OLAPQuery.EnumeratorTypeEnum; import org.apache.kylin.query.relnode.OLAPTableScan; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; @@ -61,6 +67,8 @@ import com.google.common.collect.Sets; */ public class OLAPTable extends AbstractQueryableTable implements TranslatableTable { + protected static final Logger logger = LoggerFactory.getLogger(OLAPTable.class); + private static Map<String, SqlTypeName> SQLTYPE_MAPPING = new HashMap<String, SqlTypeName>(); static { @@ -123,7 +131,7 @@ public class OLAPTable extends AbstractQueryableTable implements TranslatableTab private RelDataType deriveRowType(RelDataTypeFactory typeFactory) { RelDataTypeFactory.FieldInfoBuilder fieldInfo = typeFactory.builder(); for (ColumnDesc column : exposedColumns) { - RelDataType sqlType = createSqlType(typeFactory, column.getType(), column.isNullable()); + RelDataType sqlType = createSqlType(typeFactory, column.getUpgradedType(), column.isNullable()); sqlType = SqlTypeUtil.addCharsetAndCollation(sqlType, typeFactory); fieldInfo.add(column.getName(), sqlType); } @@ -176,6 +184,8 @@ public class OLAPTable extends AbstractQueryableTable implements TranslatableTab //if exist sum(x), where x is integer/short/byte //to avoid overflow we upgrade x's type to long + //this includes checking two parts: + //1. sum measures in cubes: HashSet<ColumnDesc> updateColumns = Sets.newHashSet(); for (MeasureDesc m : mgr.listEffectiveMeasures(olapSchema.getProjectName(), sourceTable.getIdentity())) { if (m.getFunction().isSum()) { @@ -187,9 +197,26 @@ public class OLAPTable extends AbstractQueryableTable implements TranslatableTab } } } + //2. All integer measures in non-cube realizations + MetadataManager metadataManager = MetadataManager.getInstance(olapSchema.getConfig()); + for (IRealization realization : mgr.listAllRealizations(olapSchema.getProjectName())) { + if (realization.getType() == RealizationType.INVERTED_INDEX && sourceTable.getIdentity().equalsIgnoreCase(realization.getFactTable())) { + DataModelDesc dataModelDesc = realization.getDataModelDesc(); + for (String metricColumn : dataModelDesc.getMetrics()) { + ColumnDesc columnDesc = metadataManager.getColumnDesc(dataModelDesc.getFactTable() + "." + metricColumn); + if (columnDesc.getType().isIntegerFamily() && !columnDesc.getType().isBigInt()) + updateColumns.add(columnDesc); + } + } + } + for (ColumnDesc upgrade : updateColumns) { int index = tableColumns.indexOf(upgrade); - tableColumns.get(index).setDatatype("bigint"); + if (index < 0) { + throw new IllegalStateException("Metric column " + upgrade + " is not found in the the project's columns"); + } + tableColumns.get(index).setUpgradedType("bigint"); + logger.info("To avoid overflow, upgraded {}'s type from {} to {}", tableColumns.get(index), tableColumns.get(index).getType(), tableColumns.get(index).getUpgradedType()); } return Lists.newArrayList(Iterables.concat(tableColumns, metricColumns));