Repository: kylin Updated Branches: refs/heads/2.x-staging 3c2329dc1 -> 82c6d588f
KYLIN-1313 Enable deriving dimensions on non PK/FK Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/82c6d588 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/82c6d588 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/82c6d588 Branch: refs/heads/2.x-staging Commit: 82c6d588fefa8851c49a2b3a27c522ee72521682 Parents: 3c2329d Author: honma <ho...@ebay.com> Authored: Mon Feb 1 14:03:43 2016 +0800 Committer: honma <ho...@ebay.com> Committed: Mon Feb 1 18:46:14 2016 +0800 ---------------------------------------------------------------------- .../org/apache/kylin/cube/model/CubeDesc.java | 81 +++- .../cube/model/CubeJoinedFlatTableDesc.java | 6 +- .../apache/kylin/cube/model/DimensionDesc.java | 7 +- .../apache/kylin/cube/model/v3/CubeDesc.java | 18 +- .../org/apache/kylin/cube/util/CubingUtils.java | 4 +- .../org/apache/kylin/measure/MeasureType.java | 2 +- .../kylin/measure/MeasureTypeFactory.java | 2 + .../ExtendedColumnMeasureType.java | 269 ++++++++++++ .../ExtendedColumnSerializer.java | 75 ++++ .../kylin/measure/topn/TopNMeasureType.java | 2 +- .../kylin/metadata/model/DataModelDesc.java | 10 +- .../kylin/metadata/model/FunctionDesc.java | 19 +- .../ExtendedColumnSerializerTest.java | 82 ++++ .../test_kylin_cube_without_slr_desc.json | 421 ++++++++++++------- .../test_case_data/sandbox/kylin.properties | 2 +- .../kylin/provision/BuildCubeWithEngine.java | 3 +- .../apache/kylin/query/ITKylinQueryTest.java | 2 +- .../hbase/cube/v1/CubeSegmentTupleIterator.java | 2 +- .../storage/hbase/cube/v1/CubeStorageQuery.java | 19 +- .../hbase/cube/v1/CubeTupleConverter.java | 4 +- .../hbase/cube/v2/CubeSegmentScanner.java | 2 +- .../storage/hbase/cube/v2/CubeStorageQuery.java | 15 +- .../cube/v2/SequentialCubeTupleIterator.java | 2 +- 23 files changed, 839 insertions(+), 210 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java index 66a4b8a..2dc04d4 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java @@ -44,6 +44,7 @@ import org.apache.kylin.common.util.Array; import org.apache.kylin.common.util.CaseInsensitiveStringMap; import org.apache.kylin.common.util.JsonUtil; import org.apache.kylin.measure.MeasureType; +import org.apache.kylin.measure.extendedcolumn.ExtendedColumnMeasureType; import org.apache.kylin.metadata.MetadataConstants; import org.apache.kylin.metadata.MetadataManager; import org.apache.kylin.metadata.model.ColumnDesc; @@ -71,8 +72,14 @@ import com.google.common.collect.Maps; @JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = Visibility.NONE) public class CubeDesc extends RootPersistentEntity { + public static class CannotFilterExtendedColumnException extends RuntimeException { + public CannotFilterExtendedColumnException(TblColRef tblColRef) { + super(tblColRef == null ? "null" : tblColRef.getCanonicalName()); + } + } + public enum DeriveType { - LOOKUP, PK_FK + LOOKUP, PK_FK, EXTENDED_COLUMN } public static class DeriveInfo { @@ -146,6 +153,8 @@ public class CubeDesc extends RootPersistentEntity { private Map<TblColRef, DeriveInfo> derivedToHostMap = Maps.newHashMap(); private Map<Array<TblColRef>, List<DeriveInfo>> hostToDerivedMap = Maps.newHashMap(); + private Map<TblColRef, DeriveInfo> extendedColumnToHosts = Maps.newHashMap(); + public boolean isEnableSharding() { //in the future may extend to other storage that is shard-able return storageType == IStorageAware.ID_SHARDED_HBASE; @@ -171,13 +180,20 @@ public class CubeDesc extends RootPersistentEntity { } /** - * @return dimension columns excluding derived and measures + * @return dimension columns excluding derived */ - public List<TblColRef> listDimensionColumnsExcludingDerived() { + public List<TblColRef> listDimensionColumnsExcludingDerived(boolean alsoExcludeExtendedCol) { List<TblColRef> result = new ArrayList<TblColRef>(); for (TblColRef col : dimensionColumns) { - if (isDerived(col) == false) - result.add(col); + if (isDerived(col)) { + continue; + } + + if (alsoExcludeExtendedCol && isExtendedColumn(col)) { + continue; + } + + result.add(col); } return result; } @@ -209,12 +225,25 @@ public class CubeDesc extends RootPersistentEntity { return null; } + public boolean hasHostColumn(TblColRef col) { + return isDerived(col) || isExtendedColumn(col); + } + public boolean isDerived(TblColRef col) { return derivedToHostMap.containsKey(col); } + public boolean isExtendedColumn(TblColRef col) { + return extendedColumnToHosts.containsKey(col); + } + public DeriveInfo getHostInfo(TblColRef derived) { - return derivedToHostMap.get(derived); + if (isDerived(derived)) { + return derivedToHostMap.get(derived); + } else if (isExtendedColumn(derived)) { + return extendedColumnToHosts.get(derived); + } + throw new RuntimeException("Cannot get host info for " + derived); } public Map<Array<TblColRef>, List<DeriveInfo>> getHostToDerivedInfo(List<TblColRef> rowCols, Collection<TblColRef> wantedCols) { @@ -298,6 +327,10 @@ public class CubeDesc extends RootPersistentEntity { return model.getFactTableDesc(); } + public List<TableDesc> getLookupTableDescs() { + return model.getLookupTableDescs(); + } + public String[] getNullStrings() { return nullStrings; } @@ -432,7 +465,7 @@ public class CubeDesc extends RootPersistentEntity { public Map<String, TblColRef> buildColumnNameAbbreviation() { Map<String, TblColRef> r = new CaseInsensitiveStringMap<TblColRef>(); - for (TblColRef col : listDimensionColumnsExcludingDerived()) { + for (TblColRef col : listDimensionColumnsExcludingDerived(true)) { r.put(col.getName(), col); } return r; @@ -471,7 +504,7 @@ public class CubeDesc extends RootPersistentEntity { initMeasureReferenceToColumnFamily(); // check all dimension columns are presented on rowkey - List<TblColRef> dimCols = listDimensionColumnsExcludingDerived(); + List<TblColRef> dimCols = listDimensionColumnsExcludingDerived(true); if (rowkey.getRowKeyColumns().length != dimCols.size()) { addError("RowKey columns count (" + rowkey.getRowKeyColumns().length + ") does not match dimension columns count (" + dimCols.size() + "). "); } @@ -532,7 +565,7 @@ public class CubeDesc extends RootPersistentEntity { int find = ArrayUtils.indexOf(dimColArray, fk[i]); if (find >= 0) { TblColRef derivedCol = initDimensionColRef(pk[i]); - initDerivedMap(dimColArray[find], DeriveType.PK_FK, dim, derivedCol); + initDerivedMap(new TblColRef[] { dimColArray[find] }, DeriveType.PK_FK, dim, new TblColRef[] { derivedCol }, null); } } /** disable this code as we don't need fk be derived from pk @@ -565,10 +598,6 @@ public class CubeDesc extends RootPersistentEntity { return new String[][] { cols, extra }; } - private void initDerivedMap(TblColRef hostCol, DeriveType type, DimensionDesc dimension, TblColRef derivedCol) { - initDerivedMap(new TblColRef[] { hostCol }, type, dimension, new TblColRef[] { derivedCol }, null); - } - private void initDerivedMap(TblColRef[] hostCols, DeriveType type, DimensionDesc dimension, TblColRef[] derivedCols, String[] extra) { if (hostCols.length == 0 || derivedCols.length == 0) throw new IllegalStateException("host/derived columns must not be empty"); @@ -584,17 +613,20 @@ public class CubeDesc extends RootPersistentEntity { } } + Map<TblColRef, DeriveInfo> toHostMap = derivedToHostMap; + Map<Array<TblColRef>, List<DeriveInfo>> hostToMap = hostToDerivedMap; + Array<TblColRef> hostColArray = new Array<TblColRef>(hostCols); - List<DeriveInfo> infoList = hostToDerivedMap.get(hostColArray); + List<DeriveInfo> infoList = hostToMap.get(hostColArray); if (infoList == null) { - hostToDerivedMap.put(hostColArray, infoList = new ArrayList<DeriveInfo>()); + hostToMap.put(hostColArray, infoList = new ArrayList<DeriveInfo>()); } infoList.add(new DeriveInfo(type, dimension, derivedCols, false)); for (int i = 0; i < derivedCols.length; i++) { TblColRef derivedCol = derivedCols[i]; boolean isOneToOne = type == DeriveType.PK_FK || ArrayUtils.contains(hostCols, derivedCol) || (extra != null && extra[i].contains("1-1")); - derivedToHostMap.put(derivedCol, new DeriveInfo(type, dimension, hostCols, isOneToOne)); + toHostMap.put(derivedCol, new DeriveInfo(type, dimension, hostCols, isOneToOne)); } } @@ -640,6 +672,7 @@ public class CubeDesc extends RootPersistentEntity { } TableDesc factTable = getFactTableDesc(); + List<TableDesc> lookupTables = getLookupTableDescs(); for (MeasureDesc m : measures) { m.setName(m.getName().toUpperCase()); @@ -648,11 +681,23 @@ public class CubeDesc extends RootPersistentEntity { } FunctionDesc func = m.getFunction(); - func.init(factTable); + func.init(factTable,lookupTables); allColumns.addAll(func.getParameter().getColRefs()); + + if (ExtendedColumnMeasureType.FUNC_RAW.equalsIgnoreCase(m.getFunction().getExpression())) { + FunctionDesc functionDesc = m.getFunction(); + + List<TblColRef> hosts = ExtendedColumnMeasureType.getExtendedColumnHosts(functionDesc); + TblColRef extendedColumn = ExtendedColumnMeasureType.getExtendedColumn(functionDesc); + initExtendedColumnMap(hosts.toArray(new TblColRef[hosts.size()]), extendedColumn); + } } } + private void initExtendedColumnMap(TblColRef[] hostCols, TblColRef extendedColumn) { + extendedColumnToHosts.put(extendedColumn, new DeriveInfo(DeriveType.EXTENDED_COLUMN, null, hostCols, false)); + } + private void initMeasureReferenceToColumnFamily() { if (measures == null || measures.size() == 0) return; @@ -773,7 +818,7 @@ public class CubeDesc extends RootPersistentEntity { public void setPartitionDateEnd(long partitionDateEnd) { this.partitionDateEnd = partitionDateEnd; } - + public List<TblColRef> getAllColumnsNeedDictionary() { List<TblColRef> result = Lists.newArrayList(); http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/core-cube/src/main/java/org/apache/kylin/cube/model/CubeJoinedFlatTableDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeJoinedFlatTableDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeJoinedFlatTableDesc.java index adaf542..b773b3f 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeJoinedFlatTableDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeJoinedFlatTableDesc.java @@ -59,7 +59,7 @@ public class CubeJoinedFlatTableDesc implements IJoinedFlatTableDesc { } int columnIndex = 0; - for (TblColRef col : cubeDesc.listDimensionColumnsExcludingDerived()) { + for (TblColRef col : cubeDesc.listDimensionColumnsExcludingDerived(false)) { columnIndexMap.put(colName(col.getCanonicalName()), columnIndex); columnList.add(new IntermediateColumnDesc(String.valueOf(columnIndex), col)); columnIndex++; @@ -76,7 +76,7 @@ public class CubeJoinedFlatTableDesc implements IJoinedFlatTableDesc { } rowKeyColumnIndexes[i] = dimIdx; } - + List<MeasureDesc> measures = cubeDesc.getMeasures(); int measureSize = measures.size(); measureColumnIndexes = new int[measureSize][]; @@ -163,7 +163,7 @@ public class CubeJoinedFlatTableDesc implements IJoinedFlatTableDesc { Integer index = columnIndexMap.get(key); if (index == null) throw new IllegalArgumentException("Column " + colRef.toString() + " wasn't found on flat table."); - + return index.intValue(); } http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/core-cube/src/main/java/org/apache/kylin/cube/model/DimensionDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/DimensionDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/DimensionDesc.java index bccae58..3a348a3 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/DimensionDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/DimensionDesc.java @@ -93,16 +93,16 @@ public class DimensionDesc { // h.setColumn(h.getColumn().toUpperCase()); // } - if (derived != null && derived.length == 0) + if (derived != null && derived.length == 0) { derived = null; - + } if (derived != null) { StringUtil.toUpperCaseArray(derived, derived); } - if (derived != null && join == null) { throw new IllegalStateException("Derived can only be defined on lookup table, cube " + cubeDesc + ", " + this); } + } public boolean isDerived() { @@ -148,6 +148,7 @@ public class DimensionDesc { public String[] getDerived() { return derived; } + public void setDerived(String[] derived) { this.derived = derived; http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/core-cube/src/main/java/org/apache/kylin/cube/model/v3/CubeDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/v3/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/v3/CubeDesc.java index f6245cd..9467e9a 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/v3/CubeDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/v3/CubeDesc.java @@ -646,7 +646,7 @@ public class CubeDesc extends RootPersistentEntity { } FunctionDesc func = m.getFunction(); - func.init(factTable); + func.init(factTable, null); allColumns.addAll(func.getParameter().getColRefs()); func.getMeasureType().validate(func); @@ -704,14 +704,14 @@ public class CubeDesc extends RootPersistentEntity { measures = Lists.newArrayList(); } -// Collections.sort(measures, new Comparator<MeasureDesc>() { -// @Override -// public int compare(MeasureDesc m1, MeasureDesc m2) { -// Integer id1 = m1.getId(); -// Integer id2 = m2.getId(); -// return id1.compareTo(id2); -// } -// }); + // Collections.sort(measures, new Comparator<MeasureDesc>() { + // @Override + // public int compare(MeasureDesc m1, MeasureDesc m2) { + // Integer id1 = m1.getId(); + // Integer id2 = m2.getId(); + // return id1.compareTo(id2); + // } + // }); } private void sortHierarchiesByLevel(HierarchyDesc[] hierarchies) { http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java index 7e52ef1..bcb2caf 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java @@ -42,7 +42,6 @@ import java.util.Map; import javax.annotation.Nullable; import org.apache.kylin.common.KylinConfig; -import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter; import org.apache.kylin.common.util.ByteArray; import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.cube.CubeInstance; @@ -55,6 +54,7 @@ import org.apache.kylin.dict.DictionaryGenerator; import org.apache.kylin.dict.DictionaryInfo; import org.apache.kylin.dict.DictionaryManager; import org.apache.kylin.dict.IterableDictionaryValueEnumerator; +import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter; import org.apache.kylin.metadata.model.TblColRef; import org.apache.kylin.source.ReadableTable; import org.slf4j.Logger; @@ -149,7 +149,7 @@ public class CubingUtils { } public static Map<TblColRef, Dictionary<String>> buildDictionary(final CubeInstance cubeInstance, Iterable<List<String>> recordList) throws IOException { - final List<TblColRef> columnsNeedToBuildDictionary = cubeInstance.getDescriptor().listDimensionColumnsExcludingDerived(); + final List<TblColRef> columnsNeedToBuildDictionary = cubeInstance.getDescriptor().listDimensionColumnsExcludingDerived(true); final HashMap<Integer, TblColRef> tblColRefMap = Maps.newHashMap(); int index = 0; for (TblColRef column : columnsNeedToBuildDictionary) { http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java index 26cac81..4664593 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java @@ -146,6 +146,6 @@ abstract public class MeasureType<T> { int getNumOfRows(); /** Fill in specified row into tuple. */ - void fillTuplle(Tuple tuple, int row); + void fillTuple(Tuple tuple, int row); } } http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java index 5e045e6..e58d82a 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java @@ -23,6 +23,7 @@ import java.util.Map; import org.apache.kylin.measure.basic.BasicMeasureType; import org.apache.kylin.measure.bitmap.BitmapMeasureType; +import org.apache.kylin.measure.extendedcolumn.ExtendedColumnMeasureType; import org.apache.kylin.measure.hllc.HLLCMeasureType; import org.apache.kylin.measure.topn.TopNMeasureType; import org.apache.kylin.metadata.datatype.DataType; @@ -94,6 +95,7 @@ abstract public class MeasureTypeFactory<T> { factoryInsts.add(new HLLCMeasureType.Factory()); factoryInsts.add(new BitmapMeasureType.Factory()); factoryInsts.add(new TopNMeasureType.Factory()); + factoryInsts.add(new ExtendedColumnMeasureType.Factory()); /* * Maybe do classpath search for more custom measure types? http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/core-metadata/src/main/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnMeasureType.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnMeasureType.java new file mode 100644 index 0000000..ef7081c --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnMeasureType.java @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.extendedcolumn; + +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import org.apache.commons.io.Charsets; +import org.apache.kylin.common.util.ByteArray; +import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.measure.MeasureAggregator; +import org.apache.kylin.measure.MeasureIngester; +import org.apache.kylin.measure.MeasureType; +import org.apache.kylin.measure.MeasureTypeFactory; +import org.apache.kylin.metadata.datatype.DataType; +import org.apache.kylin.metadata.datatype.DataTypeSerializer; +import org.apache.kylin.metadata.model.FunctionDesc; +import org.apache.kylin.metadata.model.MeasureDesc; +import org.apache.kylin.metadata.model.TblColRef; +import org.apache.kylin.metadata.realization.CapabilityResult; +import org.apache.kylin.metadata.realization.SQLDigest; +import org.apache.kylin.metadata.tuple.Tuple; +import org.apache.kylin.metadata.tuple.TupleInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.Lists; + +public class ExtendedColumnMeasureType extends MeasureType<ByteArray> { + + private static final Logger logger = LoggerFactory.getLogger(ExtendedColumnMeasureType.class); + + public static final String FUNC_RAW = "EXTENDED_COLUMN"; + public static final String DATATYPE_RAW = "extendedcolumn"; + private final DataType dataType; + + public static class Factory extends MeasureTypeFactory<ByteArray> { + + @Override + public MeasureType<ByteArray> createMeasureType(String funcName, DataType dataType) { + return new ExtendedColumnMeasureType(funcName, dataType); + } + + @Override + public String getAggrFunctionName() { + return FUNC_RAW; + } + + @Override + public String getAggrDataTypeName() { + return DATATYPE_RAW; + } + + @Override + public Class<? extends DataTypeSerializer<ByteArray>> getAggrDataTypeSerializer() { + return ExtendedColumnSerializer.class; + } + } + + public ExtendedColumnMeasureType(String funcName, DataType dataType) { + this.dataType = dataType; + } + + public static List<TblColRef> getExtendedColumnHosts(FunctionDesc functionDesc) { + List<TblColRef> ret = Lists.newArrayList(); + List<TblColRef> params = functionDesc.getParameter().getColRefs(); + for (int i = 0; i < params.size() - 1; i++) { + ret.add(params.get(i)); + } + return ret; + } + + public static TblColRef getExtendedColumn(FunctionDesc functionDesc) { + List<TblColRef> params = functionDesc.getParameter().getColRefs(); + return params.get(params.size() - 1); + } + + @Override + public void adjustSqlDigest(MeasureDesc measureDesc, SQLDigest sqlDigest) { + FunctionDesc extendColumnFunc = measureDesc.getFunction(); + List<TblColRef> hosts = getExtendedColumnHosts(extendColumnFunc); + TblColRef extended = getExtendedColumn(extendColumnFunc); + + if (!sqlDigest.groupbyColumns.contains(extended)) { + return; + } + + sqlDigest.aggregations.add(extendColumnFunc); + sqlDigest.groupbyColumns.remove(extended); + sqlDigest.groupbyColumns.addAll(hosts); + sqlDigest.metricColumns.add(extended); + } + + @Override + public CapabilityResult.CapabilityInfluence influenceCapabilityCheck(Collection<TblColRef> unmatchedDimensions, Collection<FunctionDesc> unmatchedAggregations, SQLDigest digest, MeasureDesc measureDesc) { + TblColRef extendedCol = getExtendedColumn(measureDesc.getFunction()); + + if (!unmatchedDimensions.contains(extendedCol)) { + return null; + } + + if (digest.filterColumns.contains(extendedCol)) { + return null; + } + + unmatchedDimensions.remove(extendedCol); + + return new CapabilityResult.CapabilityInfluence() { + @Override + public double suggestCostMultiplier() { + return 0.9; + } + }; + } + + public boolean needAdvancedTupleFilling() { + return true; + } + + public IAdvMeasureFiller getAdvancedTupleFiller(FunctionDesc function, TupleInfo returnTupleInfo, Map<TblColRef, Dictionary<String>> dictionaryMap) { + final TblColRef extended = getExtendedColumn(function); + final int extendedColumnInTupleIdx = returnTupleInfo.hasColumn(extended) ? returnTupleInfo.getColumnIndex(extended) : -1; + + if (extendedColumnInTupleIdx == -1) { + throw new RuntimeException("Extended column is not required in returnTupleInfo"); + } + + return new IAdvMeasureFiller() { + private String value; + + @Override + public void reload(Object measureValue) { + ByteArray byteArray = (ByteArray) measureValue; + //the array in ByteArray is garanteed to be completed owned by the ByteArray + value = new String(byteArray.array(), Charsets.toCharset("UTF-8")); + } + + @Override + public int getNumOfRows() { + return 1; + } + + @Override + public void fillTuple(Tuple tuple, int row) { + tuple.setDimensionValue(extendedColumnInTupleIdx, value); + } + }; + } + + @Override + public MeasureIngester<ByteArray> newIngester() { + + return new MeasureIngester<ByteArray>() { + + public String truncateWhenUTF8(String s, int maxBytes) { + int b = 0; + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + + // ranges from http://en.wikipedia.org/wiki/UTF-8 + int skip = 0; + int more; + if (c <= 0x007f) { + more = 1; + } else if (c <= 0x07FF) { + more = 2; + } else if (c <= 0xd7ff) { + more = 3; + } else if (c <= 0xDFFF) { + // surrogate area, consume next char as well + more = 4; + skip = 1; + } else { + more = 3; + } + + if (b + more > maxBytes) { + return s.substring(0, i); + } + b += more; + i += skip; + } + return s; + } + + @Override + public ByteArray valueOf(String[] values, MeasureDesc measureDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) { + if (values.length <= 1) + throw new IllegalArgumentException(); + + String literal = values[values.length - 1]; + if (literal == null) { + return new ByteArray(); + } + + byte[] bytes = literal.getBytes(); + if (bytes.length <= dataType.getPrecision()) { + return new ByteArray(bytes); + } else { + return new ByteArray(truncateWhenUTF8(literal, dataType.getPrecision()).getBytes()); + } + } + }; + } + + @Override + public MeasureAggregator<ByteArray> newAggregator() { + return new MeasureAggregator<ByteArray>() { + private ByteArray byteArray = null; + private boolean warned = false; + + @Override + public void reset() { + byteArray = null; + } + + @Override + public void aggregate(ByteArray value) { + if (byteArray == null) { + byteArray = value; + } else { + if (!byteArray.equals(value)) { + if (!warned) { + logger.warn("Extended column must be unique given same host column"); + warned = true; + } + } + } + } + + @Override + public ByteArray getState() { + return byteArray; + } + + @Override + public int getMemBytesEstimate() { + return dataType.getPrecision() / 2; + } + }; + } + + @Override + public boolean needRewrite() { + return false; + } + + @Override + public Class<?> getRewriteCalciteAggrFunctionClass() { + return null; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/core-metadata/src/main/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnSerializer.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnSerializer.java b/core-metadata/src/main/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnSerializer.java new file mode 100644 index 0000000..de87e0e --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnSerializer.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.extendedcolumn; + +import java.nio.ByteBuffer; + +import org.apache.kylin.common.util.ByteArray; +import org.apache.kylin.common.util.BytesUtil; +import org.apache.kylin.metadata.datatype.DataType; +import org.apache.kylin.metadata.datatype.DataTypeSerializer; + +public class ExtendedColumnSerializer extends DataTypeSerializer<ByteArray> { + + private int extendedColumnSize; + private int maxLength; + + public ExtendedColumnSerializer(DataType dataType) { + this.extendedColumnSize = dataType.getPrecision(); + this.maxLength = this.extendedColumnSize + 4;//4 bytes for the length preamble + } + + @Override + public int peekLength(ByteBuffer in) { + int mark = in.position(); + int size = BytesUtil.readVInt(in); + int total = in.position() - mark; + if (size >= 0) { + //size <0 is the null case + total += size; + } + in.position(mark); + return total; + } + + @Override + public int maxLength() { + return maxLength; + } + + @Override + public int getStorageBytesEstimate() { + return extendedColumnSize / 2; + } + + @Override + public void serialize(ByteArray value, ByteBuffer out) { + if (value != null && value.array() != null) { + BytesUtil.writeByteArray(value.array(), value.offset(), value.length(), out); + } else { + BytesUtil.writeByteArray(null, out); + } + } + + @Override + public ByteArray deserialize(ByteBuffer in) { + //the array in ByteArray is garanteed to be completed owned by the ByteArray + return new ByteArray(BytesUtil.readByteArray(in)); + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java index 0f79c1d..0373b07 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java @@ -287,7 +287,7 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> { } @Override - public void fillTuplle(Tuple tuple, int row) { + public void fillTuple(Tuple tuple, int row) { if (expectRow++ != row) throw new IllegalStateException(); http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/core-metadata/src/main/java/org/apache/kylin/metadata/model/DataModelDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/DataModelDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/DataModelDesc.java index 1fb96b7..1647707 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/DataModelDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/DataModelDesc.java @@ -25,6 +25,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import com.google.common.collect.Lists; import org.apache.commons.lang.ArrayUtils; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.persistence.RootPersistentEntity; @@ -71,6 +72,7 @@ public class DataModelDesc extends RootPersistentEntity { private RealizationCapacity capacity = RealizationCapacity.MEDIUM; private TableDesc factTableDesc; + private List<TableDesc> lookupTableDescs = Lists.newArrayList(); /** * Error messages during resolving json metadata @@ -109,6 +111,10 @@ public class DataModelDesc extends RootPersistentEntity { return factTableDesc; } + public List<TableDesc> getLookupTableDescs() { + return lookupTableDescs; + } + public void setFactTable(String factTable) { this.factTable = factTable.toUpperCase(); } @@ -201,6 +207,7 @@ public class DataModelDesc extends RootPersistentEntity { if (dimTable == null) { throw new IllegalStateException("Table " + lookup.getTable() + " does not exist for " + this); } + this.lookupTableDescs.add(dimTable); JoinDesc join = lookup.getJoin(); if (join == null) @@ -250,8 +257,7 @@ public class DataModelDesc extends RootPersistentEntity { } } - /** - * Add error info and thrown exception out + /** * Add error info and thrown exception out * * @param message */ http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java index 36c8722..9e3effb 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java @@ -20,6 +20,7 @@ package org.apache.kylin.metadata.model; import java.util.ArrayList; import java.util.Collection; +import java.util.List; import org.apache.kylin.measure.MeasureType; import org.apache.kylin.measure.MeasureTypeFactory; @@ -55,7 +56,7 @@ public class FunctionDesc { private MeasureType<?> measureType; private boolean isDimensionAsMetric = false; - public void init(TableDesc factTable) { + public void init(TableDesc factTable, List<TableDesc> lookupTables) { expression = expression.toUpperCase(); returnDataType = DataType.getType(returnType); @@ -66,14 +67,28 @@ public class FunctionDesc { ArrayList<TblColRef> colRefs = Lists.newArrayList(); for (ParameterDesc p = parameter; p != null; p = p.getNextParameter()) { if (p.isColumnType()) { - ColumnDesc sourceColumn = factTable.findColumnByName(p.getValue()); + ColumnDesc sourceColumn = findColumn(factTable,lookupTables,p.getValue()); TblColRef colRef = new TblColRef(sourceColumn); colRefs.add(colRef); } } parameter.setColRefs(colRefs); + } + + private ColumnDesc findColumn(TableDesc factTable, List<TableDesc> lookups, String columnName) { + ColumnDesc ret = factTable.findColumnByName(columnName); + if (ret != null) { + return ret; + } + for (TableDesc lookup : lookups) { + ret = lookup.findColumnByName(columnName); + if (ret != null) { + return ret; + } + } + throw new IllegalStateException("Column is not found in any table from the model: " + columnName); } public MeasureType<?> getMeasureType() { http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/core-metadata/src/test/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnSerializerTest.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnSerializerTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnSerializerTest.java new file mode 100644 index 0000000..dd73369 --- /dev/null +++ b/core-metadata/src/test/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnSerializerTest.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.extendedcolumn; + +import java.nio.ByteBuffer; + +import org.apache.commons.lang3.StringUtils; +import org.apache.kylin.common.util.ByteArray; +import org.apache.kylin.measure.MeasureIngester; +import org.apache.kylin.measure.MeasureType; +import org.apache.kylin.measure.MeasureTypeFactory; +import org.apache.kylin.metadata.datatype.DataType; +import org.junit.Assert; +import org.junit.Test; + +public class ExtendedColumnSerializerTest { + private static MeasureType<ByteArray> measureType; + static { + measureType = (MeasureType<ByteArray>) MeasureTypeFactory.create("EXTENDED_COLUMN", "extendedcolumn(20)"); + } + + @Test + public void testSerDesNull() { + ExtendedColumnSerializer serializer = new ExtendedColumnSerializer(DataType.getType("extendedcolumn(20)")); + MeasureIngester<ByteArray> ingester = measureType.newIngester(); + ByteArray array = ingester.valueOf(new String[] { null }, null, null); + Assert.assertTrue(new ByteArray().equals(array)); + + ByteBuffer buffer = ByteBuffer.allocate(serializer.maxLength()); + serializer.serialize(array, buffer); + buffer.flip(); + int length = serializer.peekLength(buffer); + Assert.assertTrue(length == 1); + ByteArray des = serializer.deserialize(buffer); + Assert.assertTrue(new ByteArray().equals(des)); + } + + @Test + public void testNormal() { + String text = StringUtils.repeat("h", 20); + + ExtendedColumnSerializer serializer = new ExtendedColumnSerializer(DataType.getType("extendedcolumn(20)")); + MeasureIngester<ByteArray> ingester = measureType.newIngester(); + ByteArray array = ingester.valueOf(new String[] { text }, null, null); + + ByteBuffer buffer = ByteBuffer.allocate(serializer.maxLength()); + serializer.serialize(array, buffer); + buffer.flip(); + ByteArray des = serializer.deserialize(buffer); + Assert.assertTrue(new ByteArray(text.getBytes()).equals(des)); + } + + @Test + public void testOverflow() { + String text = StringUtils.repeat("h", 21); + ExtendedColumnSerializer serializer = new ExtendedColumnSerializer(DataType.getType("extendedcolumn(20)")); + MeasureIngester<ByteArray> ingester = measureType.newIngester(); + ByteArray array = ingester.valueOf(new String[] { text }, null, null); + + ByteBuffer buffer = ByteBuffer.allocate(serializer.maxLength()); + serializer.serialize(array, buffer); + buffer.flip(); + ByteArray des = serializer.deserialize(buffer); + Assert.assertTrue(new ByteArray(StringUtils.repeat("h", 20).getBytes()).equals(des)); + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json ---------------------------------------------------------------------- diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json index 8cbb0e3..4d44feb 100644 --- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json +++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json @@ -1,169 +1,282 @@ { - "uuid" : "9ac9b7a8-3929-4dff-b59d-2100aadc8dbf", - "name" : "test_kylin_cube_without_slr_desc", - "description" : null, - "dimensions" : [ { - "name" : "CAL_DT", - "table" : "EDW.TEST_CAL_DT", - "column" : "{FK}", - "derived" : [ "WEEK_BEG_DT" ] - }, { - "name" : "CATEGORY", - "table" : "DEFAULT.TEST_CATEGORY_GROUPINGS", - "column" : "{FK}", - "derived" : [ "USER_DEFINED_FIELD1", "USER_DEFINED_FIELD3", "UPD_DATE", "UPD_USER" ] - }, { - "name" : "CATEGORY_HIERARCHY", - "table" : "DEFAULT.TEST_CATEGORY_GROUPINGS", - "column" : "META_CATEG_NAME", - "derived" : null - }, { - "name" : "CATEGORY_HIERARCHY", - "table" : "DEFAULT.TEST_CATEGORY_GROUPINGS", - "column" : "CATEG_LVL2_NAME", - "derived" : null - }, { - "name" : "CATEGORY_HIERARCHY", - "table" : "DEFAULT.TEST_CATEGORY_GROUPINGS", - "column" : "CATEG_LVL3_NAME", - "derived" : null - }, { - "name" : "LSTG_FORMAT_NAME", - "table" : "DEFAULT.TEST_KYLIN_FACT", - "column" : "LSTG_FORMAT_NAME", - "derived" : null - }, { - "name" : "SITE_ID", - "table" : "EDW.TEST_SITES", - "column" : "{FK}", - "derived" : [ "SITE_NAME", "CRE_USER" ] - }, { - "name" : "SELLER_TYPE_CD", - "table" : "EDW.TEST_SELLER_TYPE_DIM", - "column" : "{FK}", - "derived" : [ "SELLER_TYPE_DESC" ] - } ], - "measures" : [ { - "name" : "GMV_SUM", - "function" : { - "expression" : "SUM", - "parameter" : { - "type" : "column", - "value" : "PRICE", - "next_parameter" : null + "uuid": "9ac9b7a8-3929-4dff-b59d-2100aadc8dbf", + "name": "test_kylin_cube_without_slr_desc", + "description": null, + "dimensions": [ + { + "name": "CAL_DT", + "table": "EDW.TEST_CAL_DT", + "column": "{FK}", + "derived": [ + "WEEK_BEG_DT" + ] + }, + { + "name": "CATEGORY", + "table": "DEFAULT.TEST_CATEGORY_GROUPINGS", + "column": "{FK}", + "derived": [ + "USER_DEFINED_FIELD1", + "USER_DEFINED_FIELD3", + "UPD_DATE", + "UPD_USER" + ] + }, + { + "name": "CATEGORY_HIERARCHY", + "table": "DEFAULT.TEST_CATEGORY_GROUPINGS", + "column": "META_CATEG_NAME", + "derived": null + }, + { + "name": "CATEGORY_HIERARCHY", + "table": "DEFAULT.TEST_CATEGORY_GROUPINGS", + "column": "CATEG_LVL2_NAME", + "derived": null + }, + { + "name": "CATEGORY_HIERARCHY", + "table": "DEFAULT.TEST_CATEGORY_GROUPINGS", + "column": "CATEG_LVL3_NAME", + "derived": null + }, + { + "name": "LSTG_FORMAT_NAME", + "table": "DEFAULT.TEST_KYLIN_FACT", + "column": "LSTG_FORMAT_NAME", + "derived": null + }, + { + "name": "SITE_ID", + "table": "EDW.TEST_SITES", + "column": "{FK}" + }, + { + "name": "SELLER_TYPE_CD", + "table": "EDW.TEST_SELLER_TYPE_DIM", + "column": "{FK}", + "derived": [ + "SELLER_TYPE_DESC" + ] + } + ], + "measures": [ + { + "name": "GMV_SUM", + "function": { + "expression": "SUM", + "parameter": { + "type": "column", + "value": "PRICE", + "next_parameter": null + }, + "returntype": "decimal(19,4)" }, - "returntype" : "decimal(19,4)" + "dependent_measure_ref": null }, - "dependent_measure_ref" : null - }, { - "name" : "GMV_MIN", - "function" : { - "expression" : "MIN", - "parameter" : { - "type" : "column", - "value" : "PRICE", - "next_parameter" : null + { + "name": "GMV_MIN", + "function": { + "expression": "MIN", + "parameter": { + "type": "column", + "value": "PRICE", + "next_parameter": null + }, + "returntype": "decimal(19,4)" }, - "returntype" : "decimal(19,4)" + "dependent_measure_ref": null }, - "dependent_measure_ref" : null - }, { - "name" : "GMV_MAX", - "function" : { - "expression" : "MAX", - "parameter" : { - "type" : "column", - "value" : "PRICE", - "next_parameter" : null + { + "name": "GMV_MAX", + "function": { + "expression": "MAX", + "parameter": { + "type": "column", + "value": "PRICE", + "next_parameter": null + }, + "returntype": "decimal(19,4)" }, - "returntype" : "decimal(19,4)" + "dependent_measure_ref": null }, - "dependent_measure_ref" : null - }, { - "name" : "TRANS_CNT", - "function" : { - "expression" : "COUNT", - "parameter" : { - "type" : "constant", - "value" : "1", - "next_parameter" : null + { + "name": "TRANS_CNT", + "function": { + "expression": "COUNT", + "parameter": { + "type": "constant", + "value": "1", + "next_parameter": null + }, + "returntype": "bigint" }, - "returntype" : "bigint" + "dependent_measure_ref": null }, - "dependent_measure_ref" : null - }, { - "name" : "ITEM_COUNT_SUM", - "function" : { - "expression" : "SUM", - "parameter" : { - "type" : "column", - "value" : "ITEM_COUNT", - "next_parameter" : null + { + "name": "ITEM_COUNT_SUM", + "function": { + "expression": "SUM", + "parameter": { + "type": "column", + "value": "ITEM_COUNT", + "next_parameter": null + }, + "returntype": "bigint" }, - "returntype" : "bigint" + "dependent_measure_ref": null }, - "dependent_measure_ref" : null - } ], - "rowkey" : { - "rowkey_columns" : [ { - "column" : "cal_dt", - "encoding" : "dict" - }, { - "column" : "leaf_categ_id", - "encoding" : "dict" - }, { - "column" : "meta_categ_name", - "encoding" : "dict" - }, { - "column" : "categ_lvl2_name", - "encoding" : "dict" - }, { - "column" : "categ_lvl3_name", - "encoding" : "dict" - }, { - "column" : "lstg_format_name", - "encoding" : "fixed_length:12" - }, { - "column" : "lstg_site_id", - "encoding" : "dict" - }, { - "column" : "slr_segment_cd", - "encoding" : "dict" - } ] + { + "name": "SITE_EXTENDED_1", + "function": { + "expression": "EXTENDED_COLUMN", + "parameter": { + "type": "column", + "value": "LSTG_SITE_ID", + "next_parameter": { + "type": "column", + "value": "SITE_NAME", + "next_parameter": null + } + }, + "returntype": "extendedcolumn(100)" + }, + "dependent_measure_ref": null + }, + { + "name": "SITE_EXTENDED_2", + "function": { + "expression": "EXTENDED_COLUMN", + "parameter": { + "type": "column", + "value": "LSTG_SITE_ID", + "next_parameter": { + "type": "column", + "value": "CRE_USER", + "next_parameter": null + } + }, + "returntype": "extendedcolumn(100)" + }, + "dependent_measure_ref": null + } + ], + "rowkey": { + "rowkey_columns": [ + { + "column": "cal_dt", + "encoding": "dict" + }, + { + "column": "leaf_categ_id", + "encoding": "dict" + }, + { + "column": "meta_categ_name", + "encoding": "dict" + }, + { + "column": "categ_lvl2_name", + "encoding": "dict" + }, + { + "column": "categ_lvl3_name", + "encoding": "dict" + }, + { + "column": "lstg_format_name", + "encoding": "fixed_length:12" + }, + { + "column": "lstg_site_id", + "encoding": "dict" + }, + { + "column": "slr_segment_cd", + "encoding": "dict" + } + ] }, - "signature" : null, - "last_modified" : 1448959801307, - "model_name" : "test_kylin_inner_join_model_desc", - "null_string" : null, - "hbase_mapping" : { - "column_family" : [ { - "name" : "f1", - "columns" : [ { - "qualifier" : "m", - "measure_refs" : [ "gmv_sum", "gmv_min", "gmv_max", "trans_cnt", "item_count_sum" ] - } ] - } ] + "signature": null, + "last_modified": 1448959801307, + "model_name": "test_kylin_inner_join_model_desc", + "null_string": null, + "hbase_mapping": { + "column_family": [ + { + "name": "f1", + "columns": [ + { + "qualifier": "m", + "measure_refs": [ + "gmv_sum", + "gmv_min", + "gmv_max", + "trans_cnt", + "item_count_sum", + "SITE_EXTENDED_1", + "SITE_EXTENDED_2" + ] + } + ] + } + ] }, - "aggregation_groups" : [ { - "includes" : [ "cal_dt", "categ_lvl2_name", "categ_lvl3_name", "leaf_categ_id", "lstg_format_name", "lstg_site_id", "meta_categ_name", "slr_segment_cd" ], - "select_rule" : { - "hierarchy_dims" : [ ], - "mandatory_dims" : [ "cal_dt" ], - "joint_dims" : [ [ "categ_lvl2_name", "categ_lvl3_name", "leaf_categ_id", "meta_categ_name" ] ] - } - }, { - "includes" : [ "cal_dt", "categ_lvl2_name", "categ_lvl3_name", "leaf_categ_id", "meta_categ_name" ], - "select_rule" : { - "hierarchy_dims" : [ [ "META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME" ] ], - "mandatory_dims" : [ "cal_dt" ], - "joint_dims" : [ ] + "aggregation_groups": [ + { + "includes": [ + "cal_dt", + "categ_lvl2_name", + "categ_lvl3_name", + "leaf_categ_id", + "lstg_format_name", + "lstg_site_id", + "meta_categ_name", + "slr_segment_cd" + ], + "select_rule": { + "hierarchy_dims": [], + "mandatory_dims": [ + "cal_dt" + ], + "joint_dims": [ + [ + "categ_lvl2_name", + "categ_lvl3_name", + "leaf_categ_id", + "meta_categ_name" + ] + ] + } + }, + { + "includes": [ + "cal_dt", + "categ_lvl2_name", + "categ_lvl3_name", + "leaf_categ_id", + "meta_categ_name" + ], + "select_rule": { + "hierarchy_dims": [ + [ + "META_CATEG_NAME", + "CATEG_LVL2_NAME", + "CATEG_LVL3_NAME" + ] + ], + "mandatory_dims": [ + "cal_dt" + ], + "joint_dims": [] + } } - } ], - "notify_list" : null, - "status_need_notify" : [ ], - "auto_merge_time_ranges" : null, - "retention_range" : 0, - "engine_type" : 2, - "storage_type" : 0, - "partition_date_start" : 0 + ], + "notify_list": null, + "status_need_notify": [], + "auto_merge_time_ranges": null, + "retention_range": 0, + "engine_type": 2, + "storage_type": 0, + "partition_date_start": 0 } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/examples/test_case_data/sandbox/kylin.properties ---------------------------------------------------------------------- diff --git a/examples/test_case_data/sandbox/kylin.properties b/examples/test_case_data/sandbox/kylin.properties index bf161fc..ccdc3a1 100644 --- a/examples/test_case_data/sandbox/kylin.properties +++ b/examples/test_case_data/sandbox/kylin.properties @@ -24,7 +24,7 @@ kylin.job.mapreduce.default.reduce.input.mb=500 # If true, job engine will not assume that hadoop CLI reside on the same server as it self # you will have to specify kylin.job.remote.cli.hostname, kylin.job.remote.cli.username and kylin.job.remote.cli.password -kylin.job.run.as.remote.cmd=false +kylin.job.run.as.remote.cmd=true # Only necessary when kylin.job.run.as.remote.cmd=true kylin.job.remote.cli.hostname=sandbox http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/kylin-it/src/test/java/org/apache/kylin/provision/BuildCubeWithEngine.java ---------------------------------------------------------------------- diff --git a/kylin-it/src/test/java/org/apache/kylin/provision/BuildCubeWithEngine.java b/kylin-it/src/test/java/org/apache/kylin/provision/BuildCubeWithEngine.java index f44fc38..28808df 100644 --- a/kylin-it/src/test/java/org/apache/kylin/provision/BuildCubeWithEngine.java +++ b/kylin-it/src/test/java/org/apache/kylin/provision/BuildCubeWithEngine.java @@ -71,6 +71,7 @@ public class BuildCubeWithEngine { public static void main(String[] args) throws Exception { beforeClass(); + BuildCubeWithEngine buildCubeWithEngine = new BuildCubeWithEngine(); buildCubeWithEngine.before(); buildCubeWithEngine.build(); @@ -98,7 +99,7 @@ public class BuildCubeWithEngine { } HBaseMetadataTestCase.staticCreateTestMetadata(AbstractKylinTestCase.SANDBOX_TEST_DATA); - + try { //check hdfs permission Configuration hconf = HadoopUtil.getCurrentConfiguration(); http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java ---------------------------------------------------------------------- diff --git a/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java b/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java index ace861a..89a9740 100644 --- a/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java +++ b/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java @@ -45,7 +45,7 @@ public class ITKylinQueryTest extends KylinTestBase { @BeforeClass public static void setUp() throws Exception { - printInfo("setUp in KylinQueryTest"); + printInfo("setU in KylinQueryTest"); joinType = "left"; setupAll(); http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeSegmentTupleIterator.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeSegmentTupleIterator.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeSegmentTupleIterator.java index 1232cb2..5e842f7 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeSegmentTupleIterator.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeSegmentTupleIterator.java @@ -123,7 +123,7 @@ public class CubeSegmentTupleIterator implements ITupleIterator { // consume any left rows from advanced measure filler if (advMeasureRowsRemaining > 0) { for (MeasureType.IAdvMeasureFiller filler : advMeasureFillers) { - filler.fillTuplle(oneTuple, advMeasureRowIndex); + filler.fillTuple(oneTuple, advMeasureRowIndex); } advMeasureRowIndex++; advMeasureRowsRemaining--; http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java index 74d57c7..1b8b586 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java @@ -117,7 +117,6 @@ public class CubeStorageQuery implements ICachableStorageQuery { Set<TblColRef> groupsD = expandDerived(groups, derivedPostAggregation); Set<TblColRef> othersD = expandDerived(others, derivedPostAggregation); othersD.removeAll(groupsD); - derivedPostAggregation.removeAll(groups); // identify cuboid Set<TblColRef> dimensionsD = Sets.newHashSet(); @@ -128,7 +127,7 @@ public class CubeStorageQuery implements ICachableStorageQuery { // isExactAggregation? meaning: tuples returned from storage requires no further aggregation in query engine Set<TblColRef> singleValuesD = findSingleValueColumns(filter); - boolean isExactAggregation = isExactAggregation(cuboid, groups, othersD, singleValuesD, derivedPostAggregation); + boolean isExactAggregation = isExactAggregation(cuboid, groupsD, othersD, singleValuesD, derivedPostAggregation); context.setExactAggregation(isExactAggregation); // translate filter for scan range and compose returning groups for coprocessor, note: @@ -232,7 +231,7 @@ public class CubeStorageQuery implements ICachableStorageQuery { private Set<TblColRef> expandDerived(Collection<TblColRef> cols, Set<TblColRef> derivedPostAggregation) { Set<TblColRef> expanded = Sets.newHashSet(); for (TblColRef col : cols) { - if (cubeDesc.isDerived(col)) { + if (cubeDesc.hasHostColumn(col)) { DeriveInfo hostInfo = cubeDesc.getHostInfo(col); for (TblColRef hostCol : hostInfo.columns) { expanded.add(hostCol); @@ -271,6 +270,10 @@ public class CubeStorageQuery implements ICachableStorageQuery { // expand derived Set<TblColRef> resultD = Sets.newHashSet(); for (TblColRef col : result) { + if (cubeDesc.isExtendedColumn(col)) { + throw new CubeDesc.CannotFilterExtendedColumnException(col); + } + if (cubeDesc.isDerived(col)) { DeriveInfo hostInfo = cubeDesc.getHostInfo(col); if (hostInfo.isOneToOne) { @@ -311,6 +314,10 @@ public class CubeStorageQuery implements ICachableStorageQuery { } private void collectColumns(TblColRef col, Set<TblColRef> collector) { + if (cubeDesc.isExtendedColumn(col)) { + throw new CubeDesc.CannotFilterExtendedColumnException(col); + } + if (cubeDesc.isDerived(col)) { DeriveInfo hostInfo = cubeDesc.getHostInfo(col); for (TblColRef h : hostInfo.columns) @@ -358,8 +365,12 @@ public class CubeStorageQuery implements ICachableStorageQuery { return compf; TblColRef derived = compf.getColumn(); - if (cubeDesc.isDerived(derived) == false) + if (cubeDesc.isExtendedColumn(derived)) { + throw new CubeDesc.CannotFilterExtendedColumnException(derived); + } + if (cubeDesc.isDerived(derived) == false) { return compf; + } DeriveInfo hostInfo = cubeDesc.getHostInfo(derived); CubeManager cubeMgr = CubeManager.getInstance(this.cubeInstance.getConfig()); http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeTupleConverter.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeTupleConverter.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeTupleConverter.java index b43a616..4f674cb 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeTupleConverter.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeTupleConverter.java @@ -148,6 +148,7 @@ public class CubeTupleConverter { } // measures + int index = 0; for (int i = 0; i < rowValueDecoders.size(); i++) { RowValueDecoder rowValueDecoder = rowValueDecoders.get(i); rowValueDecoder.decodeAndConvertJavaObj(hbaseRow); @@ -156,7 +157,7 @@ public class CubeTupleConverter { int[] measureIdx = metricsMeasureIdx[i]; int[] tupleIdx = metricsTupleIdx[i]; for (int j = 0; j < measureIdx.length; j++) { - if (measureTypes.get(j) != null) { + if (measureTypes.get(index++) != null) { tuple.setMeasureValue(tupleIdx[j], measureValues[measureIdx[j]]); } } @@ -250,5 +251,4 @@ public class CubeTupleConverter { throw new IllegalArgumentException(); } } - } http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java index aa02036..abfb74d 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java @@ -145,7 +145,7 @@ public class CubeSegmentScanner implements IGTScanner { private Set<TblColRef> replaceDerivedColumns(Set<TblColRef> input, CubeDesc cubeDesc) { Set<TblColRef> ret = Sets.newHashSet(); for (TblColRef col : input) { - if (cubeDesc.isDerived(col)) { + if (cubeDesc.hasHostColumn(col)) { for (TblColRef host : cubeDesc.getHostInfo(col).columns) { ret.add(host); } http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeStorageQuery.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeStorageQuery.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeStorageQuery.java index a61e5c3..ab8c80f 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeStorageQuery.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeStorageQuery.java @@ -90,7 +90,7 @@ public class CubeStorageQuery implements ICachableStorageQuery { //actually even if the threshold is set, it will not be used in this query engine setThreshold(dimensionsD, metrics, context); // set cautious threshold to prevent out of memory - + setLimit(filter, context); List<CubeSegmentScanner> scanners = Lists.newArrayList(); @@ -142,7 +142,7 @@ public class CubeStorageQuery implements ICachableStorageQuery { private Set<TblColRef> expandDerived(Collection<TblColRef> cols, Set<TblColRef> derivedPostAggregation) { Set<TblColRef> expanded = Sets.newHashSet(); for (TblColRef col : cols) { - if (cubeDesc.isDerived(col)) { + if (cubeDesc.hasHostColumn(col)) { DeriveInfo hostInfo = cubeDesc.getHostInfo(col); for (TblColRef hostCol : hostInfo.columns) { expanded.add(hostCol); @@ -195,6 +195,9 @@ public class CubeStorageQuery implements ICachableStorageQuery { // expand derived Set<TblColRef> resultD = Sets.newHashSet(); for (TblColRef col : result) { + if (cubeDesc.isExtendedColumn(col)) { + throw new CubeDesc.CannotFilterExtendedColumnException(col); + } if (cubeDesc.isDerived(col)) { DeriveInfo hostInfo = cubeDesc.getHostInfo(col); if (hostInfo.isOneToOne) { @@ -275,6 +278,9 @@ public class CubeStorageQuery implements ICachableStorageQuery { return compf; TblColRef derived = compf.getColumn(); + if (cubeDesc.isExtendedColumn(derived)) { + throw new CubeDesc.CannotFilterExtendedColumnException(derived); + } if (cubeDesc.isDerived(derived) == false) return compf; @@ -304,6 +310,9 @@ public class CubeStorageQuery implements ICachableStorageQuery { } private void collectColumns(TblColRef col, Set<TblColRef> collector) { + if (cubeDesc.isExtendedColumn(col)) { + throw new CubeDesc.CannotFilterExtendedColumnException(col); + } if (cubeDesc.isDerived(col)) { DeriveInfo hostInfo = cubeDesc.getHostInfo(col); for (TblColRef h : hostInfo.columns) @@ -355,7 +364,7 @@ public class CubeStorageQuery implements ICachableStorageQuery { measureType.adjustSqlDigest(measure, sqlDigest); } } - + // ============================================================================ @Override http://git-wip-us.apache.org/repos/asf/kylin/blob/82c6d588/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/SequentialCubeTupleIterator.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/SequentialCubeTupleIterator.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/SequentialCubeTupleIterator.java index f0cebfe..023b5f8 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/SequentialCubeTupleIterator.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/SequentialCubeTupleIterator.java @@ -62,7 +62,7 @@ public class SequentialCubeTupleIterator implements ITupleIterator { // consume any left rows from advanced measure filler if (advMeasureRowsRemaining > 0) { for (IAdvMeasureFiller filler : advMeasureFillers) { - filler.fillTuplle(tuple, advMeasureRowIndex); + filler.fillTuple(tuple, advMeasureRowIndex); } advMeasureRowIndex++; advMeasureRowsRemaining--;