APACHE-KYLIN-2725: Introduce a tool for creating system cubes relating to query & job metrics
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/74e167f0 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/74e167f0 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/74e167f0 Branch: refs/heads/yaho-cube-planner Commit: 74e167f090bed8537858cb44e4266f0427a0d3c7 Parents: ec6eb0e Author: Zhong <nju_y...@apache.org> Authored: Sun Aug 13 20:24:19 2017 +0800 Committer: Zhong <nju_y...@apache.org> Committed: Sun Aug 13 20:24:19 2017 +0800 ---------------------------------------------------------------------- .../kylin/metadata/model/FunctionDesc.java | 8 + .../kylin/metadata/model/IStorageAware.java | 1 + .../kylin/metadata/model/ParameterDesc.java | 6 +- .../kylin/metadata/model/PartitionDesc.java | 2 +- .../org/apache/kylin/metrics/lib/SinkTool.java | 32 + .../kylin/metrics/property/JobPropertyEnum.java | 2 +- .../metrics/property/QueryPropertyEnum.java | 2 +- tool/pom.xml | 5 + .../metrics/systemcube/CubeDescCreator.java | 636 +++++++++++++++++++ .../metrics/systemcube/CubeInstanceCreator.java | 88 +++ .../metrics/systemcube/HiveTableCreator.java | 278 ++++++++ .../metrics/systemcube/KylinTableCreator.java | 113 ++++ .../tool/metrics/systemcube/ModelCreator.java | 263 ++++++++ .../tool/metrics/systemcube/ProjectCreator.java | 101 +++ .../tool/metrics/systemcube/SCCreator.java | 292 +++++++++ .../metrics/systemcube/StreamingCreator.java | 138 ++++ .../metrics/systemcube/util/HiveSinkTool.java | 61 ++ .../metrics/systemcube/util/KafkaSinkTool.java | 32 + .../metrics/systemcube/util/StreamSinkTool.java | 74 +++ tool/src/main/resources/SCSinkTools.json | 14 + .../tool/metrics/systemcube/SCCreatorTest.java | 91 +++ tool/src/test/resources/SCSinkTools.json | 14 + 22 files changed, 2249 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/74e167f0/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java index e969be2..c902e6f 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java @@ -237,6 +237,14 @@ public class FunctionDesc implements Serializable { return parameter; } + public void setParameter(ParameterDesc parameter) { + this.parameter = parameter; + } + + public void setExpression(String expression) { + this.expression = expression; + } + public int getParameterCount() { int count = 0; for (ParameterDesc p = parameter; p != null; p = p.getNextParameter()) { http://git-wip-us.apache.org/repos/asf/kylin/blob/74e167f0/core-metadata/src/main/java/org/apache/kylin/metadata/model/IStorageAware.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/IStorageAware.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/IStorageAware.java index e552574..2f26862 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/IStorageAware.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/IStorageAware.java @@ -23,6 +23,7 @@ public interface IStorageAware { public static final int ID_HBASE = 0; public static final int ID_HYBRID = 1; public static final int ID_SHARDED_HBASE = 2; + public static final int ID_STREAM = 3; int getStorageType(); } http://git-wip-us.apache.org/repos/asf/kylin/blob/74e167f0/core-metadata/src/main/java/org/apache/kylin/metadata/model/ParameterDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/ParameterDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/ParameterDesc.java index 930dc02..f757503 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/ParameterDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/ParameterDesc.java @@ -85,6 +85,10 @@ public class ParameterDesc implements Serializable { return type; } + public void setType(String type) { + this.type = type; + } + public byte[] getBytes() throws UnsupportedEncodingException { return value.getBytes("UTF-8"); } @@ -93,7 +97,7 @@ public class ParameterDesc implements Serializable { return value; } - void setValue(String value) { + public void setValue(String value) { this.value = value; } http://git-wip-us.apache.org/repos/asf/kylin/blob/74e167f0/core-metadata/src/main/java/org/apache/kylin/metadata/model/PartitionDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/PartitionDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/PartitionDesc.java index 3c00149..7881f9a 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/PartitionDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/PartitionDesc.java @@ -116,7 +116,7 @@ public class PartitionDesc implements Serializable { } // for test - void setPartitionTimeColumn(String partitionTimeColumn) { + public void setPartitionTimeColumn(String partitionTimeColumn) { this.partitionTimeColumn = partitionTimeColumn; } http://git-wip-us.apache.org/repos/asf/kylin/blob/74e167f0/core-metrics/src/main/java/org/apache/kylin/metrics/lib/SinkTool.java ---------------------------------------------------------------------- diff --git a/core-metrics/src/main/java/org/apache/kylin/metrics/lib/SinkTool.java b/core-metrics/src/main/java/org/apache/kylin/metrics/lib/SinkTool.java new file mode 100644 index 0000000..b55516a --- /dev/null +++ b/core-metrics/src/main/java/org/apache/kylin/metrics/lib/SinkTool.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.metrics.lib; + +import java.io.Serializable; +import java.util.Map; + +public interface SinkTool extends Serializable { + int getStorageType(); + + int getSourceType(); + + String getTableNameForMetrics(String subject); + + Map<String, String> getCubeDescOverrideProperties(); +} http://git-wip-us.apache.org/repos/asf/kylin/blob/74e167f0/core-metrics/src/main/java/org/apache/kylin/metrics/property/JobPropertyEnum.java ---------------------------------------------------------------------- diff --git a/core-metrics/src/main/java/org/apache/kylin/metrics/property/JobPropertyEnum.java b/core-metrics/src/main/java/org/apache/kylin/metrics/property/JobPropertyEnum.java index bbe987a..64d13ac 100644 --- a/core-metrics/src/main/java/org/apache/kylin/metrics/property/JobPropertyEnum.java +++ b/core-metrics/src/main/java/org/apache/kylin/metrics/property/JobPropertyEnum.java @@ -21,7 +21,7 @@ package org.apache.kylin.metrics.property; import com.google.common.base.Strings; public enum JobPropertyEnum { - ID_CODE("JOB_ID"), USER("USER"), PROJECT("PROJECT"), CUBE("CUBE_NAME"), TYPE("JOB_TYPE"), ALGORITHM( + ID_CODE("JOB_ID"), USER("KUSER"), PROJECT("PROJECT"), CUBE("CUBE_NAME"), TYPE("JOB_TYPE"), ALGORITHM( "CUBING_TYPE"), STATUS("JOB_STATUS"), EXCEPTION("EXCEPTION"), // SOURCE_SIZE("TABLE_SIZE"), CUBE_SIZE("CUBE_SIZE"), BUILD_DURATION("DURATION"), WAIT_RESOURCE_TIME( "WAIT_RESOURCE_TIME"), PER_BYTES_TIME_COST("PER_BYTES_TIME_COST"), STEP_DURATION_DISTINCT_COLUMNS( http://git-wip-us.apache.org/repos/asf/kylin/blob/74e167f0/core-metrics/src/main/java/org/apache/kylin/metrics/property/QueryPropertyEnum.java ---------------------------------------------------------------------- diff --git a/core-metrics/src/main/java/org/apache/kylin/metrics/property/QueryPropertyEnum.java b/core-metrics/src/main/java/org/apache/kylin/metrics/property/QueryPropertyEnum.java index 6fe5b0f..3f016b0 100644 --- a/core-metrics/src/main/java/org/apache/kylin/metrics/property/QueryPropertyEnum.java +++ b/core-metrics/src/main/java/org/apache/kylin/metrics/property/QueryPropertyEnum.java @@ -21,7 +21,7 @@ package org.apache.kylin.metrics.property; import com.google.common.base.Strings; public enum QueryPropertyEnum { - ID_CODE("QUERY_HASH_CODE"), TYPE("QUERY_TYPE"), USER("USER"), PROJECT("PROJECT"), REALIZATION( + ID_CODE("QUERY_HASH_CODE"), TYPE("QUERY_TYPE"), USER("KUSER"), PROJECT("PROJECT"), REALIZATION( "REALIZATION"), REALIZATION_TYPE("REALIZATION_TYPE"), EXCEPTION("EXCEPTION"), // TIME_COST("QUERY_TIME_COST"), CALCITE_RETURN_COUNT("CALCITE_COUNT_RETURN"), STORAGE_RETURN_COUNT( "STORAGE_COUNT_RETURN"), AGGR_FILTER_COUNT("CALCITE_COUNT_AGGREGATE_FILTER"); http://git-wip-us.apache.org/repos/asf/kylin/blob/74e167f0/tool/pom.xml ---------------------------------------------------------------------- diff --git a/tool/pom.xml b/tool/pom.xml index 855e0ca..9feff5c 100644 --- a/tool/pom.xml +++ b/tool/pom.xml @@ -38,6 +38,11 @@ <dependencies> <dependency> <groupId>org.apache.kylin</groupId> + <artifactId>kylin-core-metrics</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.kylin</groupId> <artifactId>kylin-storage-hbase</artifactId> </dependency> <dependency> http://git-wip-us.apache.org/repos/asf/kylin/blob/74e167f0/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeDescCreator.java ---------------------------------------------------------------------- diff --git a/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeDescCreator.java b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeDescCreator.java new file mode 100644 index 0000000..dad1629 --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeDescCreator.java @@ -0,0 +1,636 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.tool.metrics.systemcube; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.common.util.Pair; +import org.apache.kylin.cube.CubeDescManager; +import org.apache.kylin.cube.model.AggregationGroup; +import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.cube.model.DimensionDesc; +import org.apache.kylin.cube.model.HBaseColumnDesc; +import org.apache.kylin.cube.model.HBaseColumnFamilyDesc; +import org.apache.kylin.cube.model.HBaseMappingDesc; +import org.apache.kylin.cube.model.RowKeyColDesc; +import org.apache.kylin.cube.model.RowKeyDesc; +import org.apache.kylin.cube.model.SelectRule; +import org.apache.kylin.dimension.DictionaryDimEnc; +import org.apache.kylin.job.constant.JobStatusEnum; +import org.apache.kylin.measure.percentile.PercentileMeasureType; +import org.apache.kylin.metadata.model.FunctionDesc; +import org.apache.kylin.metadata.model.IEngineAware; +import org.apache.kylin.metadata.model.MeasureDesc; +import org.apache.kylin.metadata.model.ParameterDesc; +import org.apache.kylin.metrics.lib.SinkTool; +import org.apache.kylin.metrics.lib.impl.RecordEvent; +import org.apache.kylin.metrics.lib.impl.TimePropertyEnum; +import org.apache.kylin.metrics.property.JobPropertyEnum; +import org.apache.kylin.metrics.property.QueryCubePropertyEnum; +import org.apache.kylin.metrics.property.QueryPropertyEnum; +import org.apache.kylin.metrics.property.QueryRPCPropertyEnum; +import org.apache.kylin.tool.metrics.systemcube.util.HiveSinkTool; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +public class CubeDescCreator { + + public static void main(String[] args) throws Exception { + // KylinConfig.setSandboxEnvIfPossible(); + KylinConfig config = KylinConfig.getInstanceFromEnv(); + + CubeDesc kylinCubeDesc = generateKylinCubeDescForMetricsQuery(config, new HiveSinkTool()); + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + DataOutputStream dout = new DataOutputStream(buf); + CubeDescManager.CUBE_DESC_SERIALIZER.serialize(kylinCubeDesc, dout); + dout.close(); + buf.close(); + System.out.println(buf.toString()); + } + + public static CubeDesc generateKylinCubeDescForMetricsQuery(KylinConfig config, SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectQuery()); + + //Set for dimensions + List<String> dimensions = ModelCreator.getDimensionsForMetricsQuery(); + dimensions.remove(TimePropertyEnum.DAY_TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.TIME.toString()); + + List<DimensionDesc> dimensionDescList = Lists.newArrayListWithExpectedSize(dimensions.size()); + for (String dimensionName : dimensions) { + dimensionDescList.add(getDimensionDesc(tableName, dimensionName)); + } + + //Set for measures + List<String> measures = ModelCreator.getMeasuresForMetricsQuery(); + measures.remove(QueryPropertyEnum.ID_CODE.toString()); + List<MeasureDesc> measureDescList = Lists.newArrayListWithExpectedSize(measures.size() * 2 + 1 + 1); + + List<Pair<String, String>> measureTypeList = HiveTableCreator.getHiveColumnsForMetricsQuery(); + Map<String, String> measureTypeMap = Maps.newHashMapWithExpectedSize(measureTypeList.size()); + for (Pair<String, String> entry : measureTypeList) { + measureTypeMap.put(entry.getKey(), entry.getValue()); + } + measureDescList.add(getMeasureCount()); + measureDescList.add(getMeasureMin(QueryPropertyEnum.TIME_COST.toString(), + measureTypeMap.get(QueryPropertyEnum.TIME_COST.toString()))); + for (String measure : measures) { + measureDescList.add(getMeasureSum(measure, measureTypeMap.get(measure))); + measureDescList.add(getMeasureMax(measure, measureTypeMap.get(measure))); + } + measureDescList.add(getMeasureHLL(QueryPropertyEnum.ID_CODE.toString())); + measureDescList.add(getMeasurePercentile(QueryPropertyEnum.TIME_COST.toString())); + + //Set for row key + RowKeyColDesc[] rowKeyColDescs = new RowKeyColDesc[dimensionDescList.size()]; + int idx = getTimeRowKeyColDesc(rowKeyColDescs); + rowKeyColDescs[idx] = getRowKeyColDesc(QueryPropertyEnum.USER.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(QueryPropertyEnum.PROJECT.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(QueryPropertyEnum.REALIZATION.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(QueryPropertyEnum.REALIZATION_TYPE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(QueryPropertyEnum.EXCEPTION.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(QueryPropertyEnum.TYPE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(RecordEvent.RecordReserveKeyEnum.HOST.toString(), idx + 1); + idx++; + + RowKeyDesc rowKeyDesc = new RowKeyDesc(); + rowKeyDesc.setRowkeyColumns(rowKeyColDescs); + + //Set for aggregation group + String[][] hierarchy_dims = new String[2][]; + hierarchy_dims[0] = getTimeHierarchy(); + hierarchy_dims[1] = new String[2]; + hierarchy_dims[1][0] = QueryPropertyEnum.REALIZATION_TYPE.toString(); + hierarchy_dims[1][1] = QueryPropertyEnum.REALIZATION.toString(); + + SelectRule selectRule = new SelectRule(); + selectRule.mandatoryDims = new String[0]; + selectRule.hierarchyDims = hierarchy_dims; + selectRule.jointDims = new String[0][0]; + + AggregationGroup aggGroup = new AggregationGroup(); + aggGroup.setIncludes(dimensions.toArray(new String[dimensions.size()])); + aggGroup.setSelectRule(selectRule); + + //Set for hbase mapping + HBaseMappingDesc hBaseMapping = new HBaseMappingDesc(); + hBaseMapping.setColumnFamily(getHBaseColumnFamily(measureDescList)); + + return generateKylinCubeDesc(tableName, sinkTool.getStorageType(), dimensionDescList, measureDescList, + rowKeyDesc, aggGroup, hBaseMapping, sinkTool.getCubeDescOverrideProperties()); + } + + public static CubeDesc generateKylinCubeDescForMetricsQueryCube(KylinConfig config, SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectQueryCube()); + + //Set for dimensions + List<String> dimensions = ModelCreator.getDimensionsForMetricsQueryCube(); + dimensions.remove(TimePropertyEnum.DAY_TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.HOST.toString()); + dimensions.remove(QueryCubePropertyEnum.PROJECT.toString()); + + List<DimensionDesc> dimensionDescList = Lists.newArrayListWithExpectedSize(dimensions.size()); + for (String dimensionName : dimensions) { + dimensionDescList.add(getDimensionDesc(tableName, dimensionName)); + } + + //Set for measures + List<String> measures = ModelCreator.getMeasuresForMetricsQueryCube(); + List<MeasureDesc> measureDescList = Lists.newArrayListWithExpectedSize(measures.size() * 2); + + List<Pair<String, String>> measureTypeList = HiveTableCreator.getHiveColumnsForMetricsQueryCube(); + Map<String, String> measureTypeMap = Maps.newHashMapWithExpectedSize(measureTypeList.size()); + for (Pair<String, String> entry : measureTypeList) { + measureTypeMap.put(entry.getKey(), entry.getValue()); + } + measureDescList.add(getMeasureCount()); + for (String measure : measures) { + measureDescList.add(getMeasureSum(measure, measureTypeMap.get(measure))); + if (!measure.equals(QueryCubePropertyEnum.WEIGHT_PER_HIT.toString())) { + measureDescList.add(getMeasureMax(measure, measureTypeMap.get(measure))); + } + } + + //Set for row key + RowKeyColDesc[] rowKeyColDescs = new RowKeyColDesc[dimensionDescList.size()]; + int idx = getTimeRowKeyColDesc(rowKeyColDescs); + rowKeyColDescs[idx] = getRowKeyColDesc(QueryCubePropertyEnum.CUBE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(QueryCubePropertyEnum.SEGMENT.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(QueryCubePropertyEnum.CUBOID_SOURCE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(QueryCubePropertyEnum.CUBOID_TARGET.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(QueryCubePropertyEnum.IF_MATCH.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(QueryCubePropertyEnum.IF_SUCCESS.toString(), idx + 1); + idx++; + + RowKeyDesc rowKeyDesc = new RowKeyDesc(); + rowKeyDesc.setRowkeyColumns(rowKeyColDescs); + + //Set for aggregation group + String[] mandatory_dims = new String[] { QueryCubePropertyEnum.CUBE.toString() }; + + String[][] hierarchy_dims = new String[1][]; + hierarchy_dims[0] = getTimeHierarchy(); + + String[][] joint_dims = new String[1][]; + joint_dims[0] = new String[] { QueryCubePropertyEnum.CUBOID_SOURCE.toString(), + QueryCubePropertyEnum.CUBOID_TARGET.toString() }; + + SelectRule selectRule = new SelectRule(); + selectRule.mandatoryDims = mandatory_dims; + selectRule.hierarchyDims = hierarchy_dims; + selectRule.jointDims = joint_dims; + + AggregationGroup aggGroup = new AggregationGroup(); + aggGroup.setIncludes(dimensions.toArray(new String[dimensions.size()])); + aggGroup.setSelectRule(selectRule); + + //Set for hbase mapping + HBaseMappingDesc hBaseMapping = new HBaseMappingDesc(); + hBaseMapping.setColumnFamily(getHBaseColumnFamily(measureDescList)); + + return generateKylinCubeDesc(tableName, sinkTool.getStorageType(), dimensionDescList, measureDescList, + rowKeyDesc, aggGroup, hBaseMapping, sinkTool.getCubeDescOverrideProperties()); + } + + public static CubeDesc generateKylinCubeDescForMetricsQueryRPC(KylinConfig config, SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectQueryRpcCall()); + + //Set for dimensions + List<String> dimensions = ModelCreator.getDimensionsForMetricsQueryRPC(); + dimensions.remove(TimePropertyEnum.DAY_TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.TIME.toString()); + + List<DimensionDesc> dimensionDescList = Lists.newArrayListWithExpectedSize(dimensions.size()); + for (String dimensionName : dimensions) { + dimensionDescList.add(getDimensionDesc(tableName, dimensionName)); + } + + //Set for measures + List<String> measures = ModelCreator.getMeasuresForMetricsQueryRPC(); + List<MeasureDesc> measureDescList = Lists.newArrayListWithExpectedSize(measures.size() * 2 + 1 + 1); + + List<Pair<String, String>> measureTypeList = HiveTableCreator.getHiveColumnsForMetricsQueryRPC(); + Map<String, String> measureTypeMap = Maps.newHashMapWithExpectedSize(measureTypeList.size()); + for (Pair<String, String> entry : measureTypeList) { + measureTypeMap.put(entry.getKey(), entry.getValue()); + } + measureDescList.add(getMeasureCount()); + for (String measure : measures) { + measureDescList.add(getMeasureSum(measure, measureTypeMap.get(measure))); + measureDescList.add(getMeasureMax(measure, measureTypeMap.get(measure))); + } + measureDescList.add(getMeasurePercentile(QueryRPCPropertyEnum.CALL_TIME.toString())); + + //Set for row key + RowKeyColDesc[] rowKeyColDescs = new RowKeyColDesc[dimensionDescList.size()]; + int idx = getTimeRowKeyColDesc(rowKeyColDescs); + rowKeyColDescs[idx] = getRowKeyColDesc(QueryRPCPropertyEnum.PROJECT.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(QueryRPCPropertyEnum.REALIZATION.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(QueryRPCPropertyEnum.RPC_SERVER.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(RecordEvent.RecordReserveKeyEnum.HOST.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(QueryRPCPropertyEnum.EXCEPTION.toString(), idx + 1); + idx++; + + RowKeyDesc rowKeyDesc = new RowKeyDesc(); + rowKeyDesc.setRowkeyColumns(rowKeyColDescs); + + //Set for aggregation group + String[][] hierarchy_dims = new String[1][]; + hierarchy_dims[0] = getTimeHierarchy(); + + SelectRule selectRule = new SelectRule(); + selectRule.mandatoryDims = new String[0]; + selectRule.hierarchyDims = hierarchy_dims; + selectRule.jointDims = new String[0][0]; + + AggregationGroup aggGroup = new AggregationGroup(); + aggGroup.setIncludes(dimensions.toArray(new String[dimensions.size()])); + aggGroup.setSelectRule(selectRule); + + //Set for hbase mapping + HBaseMappingDesc hBaseMapping = new HBaseMappingDesc(); + hBaseMapping.setColumnFamily(getHBaseColumnFamily(measureDescList)); + + return generateKylinCubeDesc(tableName, sinkTool.getStorageType(), dimensionDescList, measureDescList, + rowKeyDesc, aggGroup, hBaseMapping, sinkTool.getCubeDescOverrideProperties()); + } + + public static CubeDesc generateKylinCubeDescForMetricsJob(KylinConfig config, SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectJob()); + + //Set for dimensions + List<String> dimensions = ModelCreator.getDimensionsForMetricsJob(); + dimensions.remove(TimePropertyEnum.DAY_TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.HOST.toString()); + + List<DimensionDesc> dimensionDescList = Lists.newArrayListWithExpectedSize(dimensions.size()); + for (String dimensionName : dimensions) { + dimensionDescList.add(getDimensionDesc(tableName, dimensionName)); + } + + //Set for measures + List<String> measures = ModelCreator.getMeasuresForMetricsJob(); + List<MeasureDesc> measureDescList = Lists.newArrayListWithExpectedSize((measures.size() - 4) * 3 + 1 + 1 + 4); + + Set<String> stepDuration = Sets.newHashSet(); + stepDuration.add(JobPropertyEnum.STEP_DURATION_DISTINCT_COLUMNS.toString()); + stepDuration.add(JobPropertyEnum.STEP_DURATION_DICTIONARY.toString()); + stepDuration.add(JobPropertyEnum.STEP_DURATION_INMEM_CUBING.toString()); + stepDuration.add(JobPropertyEnum.STEP_DURATION_HFILE_CONVERT.toString()); + + List<Pair<String, String>> measureTypeList = HiveTableCreator.getHiveColumnsForMetricsJob(); + Map<String, String> measureTypeMap = Maps.newHashMapWithExpectedSize(measureTypeList.size()); + for (Pair<String, String> entry : measureTypeList) { + measureTypeMap.put(entry.getKey(), entry.getValue()); + } + measureDescList.add(getMeasureCount()); + for (String measure : measures) { + measureDescList.add(getMeasureSum(measure, measureTypeMap.get(measure))); + measureDescList.add(getMeasureMax(measure, measureTypeMap.get(measure))); + if (!stepDuration.contains(measure)) { + measureDescList.add(getMeasureMin(measure, measureTypeMap.get(measure))); + } + } + measureDescList.add(getMeasurePercentile(JobPropertyEnum.BUILD_DURATION.toString())); + + //Set for row key + RowKeyColDesc[] rowKeyColDescs = new RowKeyColDesc[dimensionDescList.size()]; + int idx = getTimeRowKeyColDesc(rowKeyColDescs); + rowKeyColDescs[idx] = getRowKeyColDesc(JobPropertyEnum.USER.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(JobPropertyEnum.PROJECT.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(JobPropertyEnum.CUBE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(JobPropertyEnum.ALGORITHM.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(JobPropertyEnum.TYPE.toString(), idx + 1); + idx++; + + RowKeyDesc rowKeyDesc = new RowKeyDesc(); + rowKeyDesc.setRowkeyColumns(rowKeyColDescs); + + //Set for aggregation group + String[][] hierarchy_dims = new String[1][]; + hierarchy_dims[0] = getTimeHierarchy(); + + SelectRule selectRule = new SelectRule(); + selectRule.mandatoryDims = new String[0]; + selectRule.hierarchyDims = hierarchy_dims; + selectRule.jointDims = new String[0][0]; + + AggregationGroup aggGroup = new AggregationGroup(); + aggGroup.setIncludes(dimensions.toArray(new String[dimensions.size()])); + aggGroup.setSelectRule(selectRule); + + //Set for hbase mapping + HBaseMappingDesc hBaseMapping = new HBaseMappingDesc(); + hBaseMapping.setColumnFamily(getHBaseColumnFamily(measureDescList)); + + return generateKylinCubeDesc(tableName, sinkTool.getStorageType(), dimensionDescList, measureDescList, + rowKeyDesc, aggGroup, hBaseMapping, sinkTool.getCubeDescOverrideProperties()); + } + + public static CubeDesc generateKylinCubeDescForMetricsJobException(KylinConfig config, SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectJobException()); + + //Set for dimensions + List<String> dimensions = ModelCreator.getDimensionsForMetricsJobException(); + dimensions.remove(TimePropertyEnum.DAY_TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.HOST.toString()); + + List<DimensionDesc> dimensionDescList = Lists.newArrayListWithExpectedSize(dimensions.size()); + for (String dimensionName : dimensions) { + dimensionDescList.add(getDimensionDesc(tableName, dimensionName)); + } + + //Set for measures + List<String> measures = ModelCreator.getMeasuresForMetricsJobException(); + measures.remove(JobPropertyEnum.ID_CODE.toString()); + List<MeasureDesc> measureDescList = Lists.newArrayListWithExpectedSize(1); + + measureDescList.add(getMeasureCount()); + + //Set for row key + RowKeyColDesc[] rowKeyColDescs = new RowKeyColDesc[dimensionDescList.size()]; + int idx = getTimeRowKeyColDesc(rowKeyColDescs); + rowKeyColDescs[idx] = getRowKeyColDesc(JobPropertyEnum.USER.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(JobPropertyEnum.PROJECT.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(JobPropertyEnum.CUBE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(JobPropertyEnum.ALGORITHM.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(JobPropertyEnum.TYPE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(JobPropertyEnum.EXCEPTION.toString(), idx + 1); + idx++; + + RowKeyDesc rowKeyDesc = new RowKeyDesc(); + rowKeyDesc.setRowkeyColumns(rowKeyColDescs); + + //Set for aggregation group + String[][] hierarchy_dims = new String[1][]; + hierarchy_dims[0] = getTimeHierarchy(); + + SelectRule selectRule = new SelectRule(); + selectRule.mandatoryDims = new String[0]; + selectRule.hierarchyDims = hierarchy_dims; + selectRule.jointDims = new String[0][0]; + + AggregationGroup aggGroup = new AggregationGroup(); + aggGroup.setIncludes(dimensions.toArray(new String[dimensions.size()])); + aggGroup.setSelectRule(selectRule); + + //Set for hbase mapping + HBaseMappingDesc hBaseMapping = new HBaseMappingDesc(); + hBaseMapping.setColumnFamily(getHBaseColumnFamily(measureDescList)); + + return generateKylinCubeDesc(tableName, sinkTool.getStorageType(), dimensionDescList, measureDescList, + rowKeyDesc, aggGroup, hBaseMapping, sinkTool.getCubeDescOverrideProperties()); + } + + public static CubeDesc generateKylinCubeDesc(String tableName, int storageType, + List<DimensionDesc> dimensionDescList, List<MeasureDesc> measureDescList, RowKeyDesc rowKeyDesc, + AggregationGroup aggGroup, HBaseMappingDesc hBaseMapping, Map<String, String> overrideProperties) { + CubeDesc desc = new CubeDesc(); + desc.setName(tableName.replace('.', '_')); + desc.setModelName(tableName.replace('.', '_')); + desc.setDescription(""); + desc.setLastModified(0L); + desc.setDimensions(dimensionDescList); + desc.setMeasures(measureDescList); + desc.setRowkey(rowKeyDesc); + desc.setHbaseMapping(hBaseMapping); + desc.setSignature(desc.calculateSignature()); + desc.setNotifyList(Lists.<String> newArrayList()); + desc.setStatusNeedNotify(Lists.newArrayList(JobStatusEnum.ERROR.toString())); + desc.setAutoMergeTimeRanges(new long[] { 86400000L, 604800000L, 2419200000L }); + desc.setEngineType(IEngineAware.ID_MR_V2); + desc.setStorageType(storageType); + desc.setAggregationGroups(Lists.newArrayList(aggGroup)); + desc.getOverrideKylinProps().putAll(overrideProperties); + desc.updateRandomUuid(); + return desc; + } + + public static HBaseColumnFamilyDesc[] getHBaseColumnFamily(List<MeasureDesc> measureDescList) { + List<String> normalMeasureList = Lists.newLinkedList(); + List<String> largeMeasureList = Lists.newLinkedList(); + for (MeasureDesc measureDesc : measureDescList) { + if (measureDesc.getFunction().isCountDistinct() + || measureDesc.getFunction().getExpression().equals(PercentileMeasureType.FUNC_PERCENTILE)) { + largeMeasureList.add(measureDesc.getName()); + } else { + normalMeasureList.add(measureDesc.getName()); + } + } + List<HBaseColumnFamilyDesc> columnFamilyDescList = Lists.newLinkedList(); + int idx = 1; + if (normalMeasureList.size() > 0) { + HBaseColumnDesc columnDesc = new HBaseColumnDesc(); + columnDesc.setQualifier("M"); + columnDesc.setMeasureRefs(normalMeasureList.toArray(new String[normalMeasureList.size()])); + HBaseColumnFamilyDesc columnFamilyDesc = new HBaseColumnFamilyDesc(); + columnFamilyDesc.setName("F" + idx++); + columnFamilyDesc.setColumns(new HBaseColumnDesc[] { columnDesc }); + + columnFamilyDescList.add(columnFamilyDesc); + } + for (String largeMeasure : largeMeasureList) { + HBaseColumnDesc columnDesc = new HBaseColumnDesc(); + columnDesc.setQualifier("M"); + columnDesc.setMeasureRefs(new String[] { largeMeasure }); + HBaseColumnFamilyDesc columnFamilyDesc = new HBaseColumnFamilyDesc(); + columnFamilyDesc.setName("F" + idx++); + columnFamilyDesc.setColumns(new HBaseColumnDesc[] { columnDesc }); + + columnFamilyDescList.add(columnFamilyDesc); + } + + return columnFamilyDescList.toArray(new HBaseColumnFamilyDesc[columnFamilyDescList.size()]); + } + + public static String[] getTimeHierarchy() { + String[] result = new String[4]; + result[0] = TimePropertyEnum.YEAR.toString(); + result[1] = TimePropertyEnum.MONTH.toString(); + result[2] = TimePropertyEnum.WEEK_BEGIN_DATE.toString(); + result[3] = TimePropertyEnum.DAY_DATE.toString(); + return result; + } + + public static int getTimeRowKeyColDesc(RowKeyColDesc[] rowKeyColDescs) { + int idx = 0; + rowKeyColDescs[idx] = getRowKeyColDesc(TimePropertyEnum.DAY_DATE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(TimePropertyEnum.WEEK_BEGIN_DATE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(TimePropertyEnum.MONTH.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(TimePropertyEnum.YEAR.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(TimePropertyEnum.TIME_HOUR.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(TimePropertyEnum.TIME_MINUTE.toString(), idx + 1); + idx++; + return idx; + } + + public static RowKeyColDesc getRowKeyColDesc(String column, int id) { + RowKeyColDesc rowKeyColDesc = new RowKeyColDesc(); + rowKeyColDesc.setIndex(Integer.toString(id)); + rowKeyColDesc.setColumn(column); + rowKeyColDesc.setEncoding(DictionaryDimEnc.ENCODING_NAME); + rowKeyColDesc.setShardBy(false); + return rowKeyColDesc; + } + + public static DimensionDesc getDimensionDesc(String tableName, String dimension) { + DimensionDesc dimensionDesc = new DimensionDesc(); + dimensionDesc.setName(tableName + "." + dimension); + dimensionDesc.setTable(tableName); + dimensionDesc.setColumn(dimension); + return dimensionDesc; + } + + public static MeasureDesc getMeasureCount() { + ParameterDesc parameterDesc = new ParameterDesc(); + parameterDesc.setValue("1"); + parameterDesc.setType(FunctionDesc.PARAMETER_TYPE_CONSTANT); + + FunctionDesc function = new FunctionDesc(); + function.setExpression(FunctionDesc.FUNC_COUNT); + function.setParameter(parameterDesc); + function.setReturnType(HiveTableCreator.HiveTypeEnum.HBIGINT.toString()); + + MeasureDesc result = new MeasureDesc(); + result.setName("_COUNT_"); + result.setFunction(function); + return result; + } + + public static MeasureDesc getMeasureSum(String column, String dataType) { + ParameterDesc parameterDesc = new ParameterDesc(); + parameterDesc.setValue(column); + parameterDesc.setType(FunctionDesc.PARAMETER_TYPE_COLUMN); + + FunctionDesc function = new FunctionDesc(); + function.setExpression(FunctionDesc.FUNC_SUM); + function.setParameter(parameterDesc); + function.setReturnType(dataType.equals(HiveTableCreator.HiveTypeEnum.HDOUBLE.toString()) + ? HiveTableCreator.HiveTypeEnum.HDECIMAL.toString() + : dataType); + + MeasureDesc result = new MeasureDesc(); + result.setName(column + "_SUM"); + result.setFunction(function); + return result; + } + + public static MeasureDesc getMeasureMax(String column, String dataType) { + ParameterDesc parameterDesc = new ParameterDesc(); + parameterDesc.setValue(column); + parameterDesc.setType(FunctionDesc.PARAMETER_TYPE_COLUMN); + + FunctionDesc function = new FunctionDesc(); + function.setExpression(FunctionDesc.FUNC_MAX); + function.setParameter(parameterDesc); + function.setReturnType(dataType); + + MeasureDesc result = new MeasureDesc(); + result.setName(column + "_MAX"); + result.setFunction(function); + return result; + } + + public static MeasureDesc getMeasureMin(String column, String dataType) { + ParameterDesc parameterDesc = new ParameterDesc(); + parameterDesc.setValue(column); + parameterDesc.setType(FunctionDesc.PARAMETER_TYPE_COLUMN); + + FunctionDesc function = new FunctionDesc(); + function.setExpression(FunctionDesc.FUNC_MIN); + function.setParameter(parameterDesc); + function.setReturnType(dataType); + + MeasureDesc result = new MeasureDesc(); + result.setName(column + "_MIN"); + result.setFunction(function); + return result; + } + + public static MeasureDesc getMeasureHLL(String column) { + ParameterDesc parameterDesc = new ParameterDesc(); + parameterDesc.setValue(column); + parameterDesc.setType(FunctionDesc.PARAMETER_TYPE_COLUMN); + + FunctionDesc function = new FunctionDesc(); + function.setExpression(FunctionDesc.FUNC_COUNT_DISTINCT); + function.setParameter(parameterDesc); + function.setReturnType("hllc12"); + + MeasureDesc result = new MeasureDesc(); + result.setName(column + "_HLL"); + result.setFunction(function); + return result; + } + + public static MeasureDesc getMeasurePercentile(String column) { + ParameterDesc parameterDesc = new ParameterDesc(); + parameterDesc.setValue(column); + parameterDesc.setType(FunctionDesc.PARAMETER_TYPE_COLUMN); + + FunctionDesc function = new FunctionDesc(); + function.setExpression(PercentileMeasureType.FUNC_PERCENTILE); + function.setParameter(parameterDesc); + function.setReturnType(PercentileMeasureType.DATATYPE_PERCENTILE); + + MeasureDesc result = new MeasureDesc(); + result.setName(column + "_PERCENTILE"); + result.setFunction(function); + return result; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/74e167f0/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeInstanceCreator.java ---------------------------------------------------------------------- diff --git a/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeInstanceCreator.java b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeInstanceCreator.java new file mode 100644 index 0000000..c1672c0 --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeInstanceCreator.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.tool.metrics.systemcube; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; + +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.cube.CubeInstance; +import org.apache.kylin.cube.CubeManager; +import org.apache.kylin.cube.CubeSegment; +import org.apache.kylin.metadata.model.Segments; +import org.apache.kylin.metadata.realization.RealizationStatusEnum; +import org.apache.kylin.metrics.lib.SinkTool; +import org.apache.kylin.tool.metrics.systemcube.util.HiveSinkTool; + +public class CubeInstanceCreator { + + public static void main(String[] args) throws Exception { + // KylinConfig.setSandboxEnvIfPossible(); + KylinConfig config = KylinConfig.getInstanceFromEnv(); + + CubeInstance cubeInstance = generateKylinCubeInstanceForMetricsQuery("ADMIN", config, new HiveSinkTool()); + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + DataOutputStream dout = new DataOutputStream(buf); + CubeManager.CUBE_SERIALIZER.serialize(cubeInstance, dout); + dout.close(); + buf.close(); + System.out.println(buf.toString()); + } + + public static CubeInstance generateKylinCubeInstanceForMetricsQuery(String owner, KylinConfig config, + SinkTool sinkTool) { + return generateKylinCubeInstance(owner, sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectQuery())); + } + + public static CubeInstance generateKylinCubeInstanceForMetricsQueryCube(String owner, KylinConfig config, + SinkTool sinkTool) { + return generateKylinCubeInstance(owner, + sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectQueryCube())); + } + + public static CubeInstance generateKylinCubeInstanceForMetricsQueryRPC(String owner, KylinConfig config, + SinkTool sinkTool) { + return generateKylinCubeInstance(owner, + sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectQueryRpcCall())); + } + + public static CubeInstance generateKylinCubeInstanceForMetricsJob(String owner, KylinConfig config, + SinkTool sinkTool) { + return generateKylinCubeInstance(owner, sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectJob())); + } + + public static CubeInstance generateKylinCubeInstanceForMetricsJobException(String owner, KylinConfig config, + SinkTool sinkTool) { + return generateKylinCubeInstance(owner, + sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectJobException())); + } + + public static CubeInstance generateKylinCubeInstance(String owner, String tableName) { + CubeInstance cubeInstance = new CubeInstance(); + cubeInstance.setName(tableName.replace('.', '_')); + cubeInstance.setSegments(new Segments<CubeSegment>()); + cubeInstance.setDescName(tableName.replace('.', '_')); + cubeInstance.setStatus(RealizationStatusEnum.DISABLED); + cubeInstance.setOwner(owner); + cubeInstance.setCreateTimeUTC(0L); + cubeInstance.updateRandomUuid(); + + return cubeInstance; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/74e167f0/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/HiveTableCreator.java ---------------------------------------------------------------------- diff --git a/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/HiveTableCreator.java b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/HiveTableCreator.java new file mode 100644 index 0000000..35b296a --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/HiveTableCreator.java @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.tool.metrics.systemcube; + +import java.util.List; + +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.common.util.Pair; +import org.apache.kylin.metrics.lib.ActiveReservoirReporter; +import org.apache.kylin.metrics.lib.impl.RecordEvent; +import org.apache.kylin.metrics.lib.impl.TimePropertyEnum; +import org.apache.kylin.metrics.lib.impl.hive.HiveProducerRecord; +import org.apache.kylin.metrics.lib.impl.hive.HiveReservoirReporter; +import org.apache.kylin.metrics.property.JobPropertyEnum; +import org.apache.kylin.metrics.property.QueryCubePropertyEnum; +import org.apache.kylin.metrics.property.QueryPropertyEnum; +import org.apache.kylin.metrics.property.QueryRPCPropertyEnum; + +import com.google.common.base.Strings; +import com.google.common.collect.Lists; + +public class HiveTableCreator { + + public static void main(String[] args) { + // KylinConfig.setSandboxEnvIfPossible(); + KylinConfig config = KylinConfig.getInstanceFromEnv(); + + System.out.println(generateAllSQL(config)); + } + + public static String generateAllSQL(KylinConfig config) { + StringBuilder sb = new StringBuilder(); + sb.append(generateDatabaseSQL()); + sb.append("\n"); + sb.append(generateHiveTableSQLForMetricsQuery(config)); + sb.append("\n"); + sb.append(generateHiveTableSQLForMetricsQueryCUBE(config)); + sb.append("\n"); + sb.append(generateHiveTableSQLForMetricsQueryRPC(config)); + sb.append("\n"); + sb.append(generateHiveTableSQLForMetricsJob(config)); + sb.append("\n"); + sb.append(generateHiveTableSQLForMetricsJobException(config)); + + return sb.toString(); + } + + public static String generateDatabaseSQL() { + return "CREATE DATABASE IF NOT EXISTS " + ActiveReservoirReporter.KYLIN_PREFIX + ";\n"; + } + + public static String generateHiveTableSQL(String tableName, List<Pair<String, String>> columns, + List<Pair<String, String>> partitionKVs) { + StringBuilder sb = new StringBuilder(); + sb.append("DROP TABLE IF EXISTS " + tableName + ";\n"); + sb.append("\n"); + sb.append("CREATE TABLE " + tableName + "\n"); + sb.append("(\n"); + for (int i = 0; i < columns.size(); i++) { + if (i > 0) { + sb.append(","); + } + Pair<String, String> column = columns.get(i); + sb.append(column.getFirst() + " " + column.getSecond() + "\n"); + } + sb.append(")\n"); + if (partitionKVs != null && partitionKVs.size() > 0) { + sb.append("PARTITIONED BY("); + for (int i = 0; i < partitionKVs.size(); i++) { + if (i > 0) { + sb.append(","); + } + Pair<String, String> partitionKV = partitionKVs.get(i); + sb.append(partitionKV.getFirst() + " " + partitionKV.getSecond()); + } + sb.append(")\n"); + } + sb.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '" + HiveProducerRecord.DELIMITER + "'\n"); + sb.append("STORED AS TEXTFILE;\n"); + return sb.toString(); + } + + public static String generateHiveTableSQLForMetricsQuery(KylinConfig config) { + String tableName = HiveReservoirReporter.getTableFromSubject(config.getKylinMetricsSubjectQuery()); + return generateHiveTableSQL(tableName, getHiveColumnsForMetricsQuery(), getPartitionKVsForHiveTable()); + } + + public static String generateHiveTableSQLForMetricsQueryCUBE(KylinConfig config) { + String tableName = HiveReservoirReporter.getTableFromSubject(config.getKylinMetricsSubjectQueryCube()); + return generateHiveTableSQL(tableName, getHiveColumnsForMetricsQueryCube(), getPartitionKVsForHiveTable()); + } + + public static String generateHiveTableSQLForMetricsQueryRPC(KylinConfig config) { + String tableName = HiveReservoirReporter.getTableFromSubject(config.getKylinMetricsSubjectQueryRpcCall()); + return generateHiveTableSQL(tableName, getHiveColumnsForMetricsQueryRPC(), getPartitionKVsForHiveTable()); + } + + public static String generateHiveTableSQLForMetricsJob(KylinConfig config) { + String tableName = HiveReservoirReporter.getTableFromSubject(config.getKylinMetricsSubjectJob()); + return generateHiveTableSQL(tableName, getHiveColumnsForMetricsJob(), getPartitionKVsForHiveTable()); + } + + public static String generateHiveTableSQLForMetricsJobException(KylinConfig config) { + String tableName = HiveReservoirReporter.getTableFromSubject(config.getKylinMetricsSubjectJobException()); + return generateHiveTableSQL(tableName, getHiveColumnsForMetricsJobException(), getPartitionKVsForHiveTable()); + } + + public static List<Pair<String, String>> getHiveColumnsForMetricsQuery() { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.add(new Pair<>(QueryPropertyEnum.ID_CODE.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(RecordEvent.RecordReserveKeyEnum.HOST.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryPropertyEnum.USER.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryPropertyEnum.PROJECT.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryPropertyEnum.REALIZATION.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryPropertyEnum.REALIZATION_TYPE.toString(), HiveTypeEnum.HINT.toString())); + columns.add(new Pair<>(QueryPropertyEnum.TYPE.toString(), HiveTypeEnum.HSTRING.toString())); + + columns.add(new Pair<>(QueryPropertyEnum.EXCEPTION.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryPropertyEnum.TIME_COST.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryPropertyEnum.CALCITE_RETURN_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryPropertyEnum.STORAGE_RETURN_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryPropertyEnum.AGGR_FILTER_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + + columns.addAll(getTimeColumnsForMetrics()); + return columns; + } + + public static List<Pair<String, String>> getHiveColumnsForMetricsQueryCube() { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.add(new Pair<>(RecordEvent.RecordReserveKeyEnum.HOST.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.PROJECT.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.CUBE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.SEGMENT.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.CUBOID_SOURCE.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.CUBOID_TARGET.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.IF_MATCH.toString(), HiveTypeEnum.HBOOLEAN.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.FILTER_MASK.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.IF_SUCCESS.toString(), HiveTypeEnum.HBOOLEAN.toString())); + + columns.add(new Pair<>(QueryCubePropertyEnum.WEIGHT_PER_HIT.toString(), HiveTypeEnum.HDOUBLE.toString())); + + columns.add(new Pair<>(QueryCubePropertyEnum.CALL_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.TIME_SUM.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.TIME_MAX.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.SKIP_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.SCAN_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.RETURN_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.AGGR_FILTER_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.AGGR_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + + columns.addAll(getTimeColumnsForMetrics()); + return columns; + } + + public static List<Pair<String, String>> getHiveColumnsForMetricsQueryRPC() { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.add(new Pair<>(RecordEvent.RecordReserveKeyEnum.HOST.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.PROJECT.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.REALIZATION.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.RPC_SERVER.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.EXCEPTION.toString(), HiveTypeEnum.HSTRING.toString())); + + columns.add(new Pair<>(QueryRPCPropertyEnum.CALL_TIME.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.RETURN_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.SCAN_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.SKIP_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.AGGR_FILTER_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.AGGR_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + + columns.addAll(getTimeColumnsForMetrics()); + return columns; + } + + public static List<Pair<String, String>> getHiveColumnsForMetricsJob() { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.add(new Pair<>(JobPropertyEnum.ID_CODE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(RecordEvent.RecordReserveKeyEnum.HOST.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.USER.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.PROJECT.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.CUBE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.TYPE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.ALGORITHM.toString(), HiveTypeEnum.HSTRING.toString())); + + columns.add(new Pair<>(JobPropertyEnum.BUILD_DURATION.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(JobPropertyEnum.SOURCE_SIZE.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(JobPropertyEnum.CUBE_SIZE.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(JobPropertyEnum.PER_BYTES_TIME_COST.toString(), HiveTypeEnum.HDOUBLE.toString())); + columns.add(new Pair<>(JobPropertyEnum.WAIT_RESOURCE_TIME.toString(), HiveTypeEnum.HBIGINT.toString())); + + columns.add( + new Pair<>(JobPropertyEnum.STEP_DURATION_DISTINCT_COLUMNS.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(JobPropertyEnum.STEP_DURATION_DICTIONARY.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(JobPropertyEnum.STEP_DURATION_INMEM_CUBING.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add( + new Pair<>(JobPropertyEnum.STEP_DURATION_HFILE_CONVERT.toString(), HiveTypeEnum.HBIGINT.toString())); + + columns.addAll(getTimeColumnsForMetrics()); + return columns; + } + + public static List<Pair<String, String>> getHiveColumnsForMetricsJobException() { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.add(new Pair<>(JobPropertyEnum.ID_CODE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(RecordEvent.RecordReserveKeyEnum.HOST.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.USER.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.PROJECT.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.CUBE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.TYPE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.ALGORITHM.toString(), HiveTypeEnum.HSTRING.toString())); + + columns.add(new Pair<>(JobPropertyEnum.EXCEPTION.toString(), HiveTypeEnum.HSTRING.toString())); + + columns.addAll(getTimeColumnsForMetrics()); + return columns; + } + + public static List<Pair<String, String>> getPartitionKVsForHiveTable() { + List<Pair<String, String>> partitionKVs = Lists.newLinkedList(); + partitionKVs.add(new Pair<>(TimePropertyEnum.DAY_DATE.toString(), HiveTypeEnum.HSTRING.toString())); + return partitionKVs; + } + + public static List<Pair<String, String>> getTimeColumnsForMetrics() { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.add(new Pair<>(RecordEvent.RecordReserveKeyEnum.TIME.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(TimePropertyEnum.YEAR.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(TimePropertyEnum.MONTH.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(TimePropertyEnum.WEEK_BEGIN_DATE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(TimePropertyEnum.DAY_TIME.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(TimePropertyEnum.TIME_HOUR.toString(), HiveTypeEnum.HINT.toString())); + columns.add(new Pair<>(TimePropertyEnum.TIME_MINUTE.toString(), HiveTypeEnum.HINT.toString())); + columns.add(new Pair<>(TimePropertyEnum.TIME_SECOND.toString(), HiveTypeEnum.HINT.toString())); + + return columns; + } + + enum HiveTypeEnum { + HBOOLEAN("boolean"), HINT("int"), HBIGINT("bigint"), HDOUBLE("double"), HSTRING("string"), HDECIMAL("decimal"); + + private final String typeName; + + HiveTypeEnum(String typeName) { + this.typeName = typeName; + } + + public static HiveTypeEnum getByTypeName(String typeName) { + if (Strings.isNullOrEmpty(typeName)) { + return null; + } + for (HiveTypeEnum hiveType : HiveTypeEnum.values()) { + if (hiveType.typeName.equals(typeName.toLowerCase())) { + return hiveType; + } + } + return null; + } + + public String toString() { + return typeName; + } + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/74e167f0/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/KylinTableCreator.java ---------------------------------------------------------------------- diff --git a/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/KylinTableCreator.java b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/KylinTableCreator.java new file mode 100644 index 0000000..66d0861 --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/KylinTableCreator.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.tool.metrics.systemcube; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.util.List; +import java.util.UUID; + +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.common.util.Pair; +import org.apache.kylin.metadata.MetadataManager; +import org.apache.kylin.metadata.model.ColumnDesc; +import org.apache.kylin.metadata.model.TableDesc; +import org.apache.kylin.metrics.lib.ActiveReservoirReporter; +import org.apache.kylin.metrics.lib.SinkTool; +import org.apache.kylin.tool.metrics.systemcube.util.HiveSinkTool; + +import com.google.common.collect.Lists; + +public class KylinTableCreator { + + public static void main(String[] args) throws Exception { + // KylinConfig.setSandboxEnvIfPossible(); + KylinConfig config = KylinConfig.getInstanceFromEnv(); + + TableDesc kylinTable = generateKylinTableForMetricsQuery(config, new HiveSinkTool()); + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + DataOutputStream dout = new DataOutputStream(buf); + MetadataManager.TABLE_SERIALIZER.serialize(kylinTable, dout); + dout.close(); + buf.close(); + System.out.println(buf.toString()); + } + + public static TableDesc generateKylinTableForMetricsQuery(KylinConfig kylinConfig, SinkTool sinkTool) { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.addAll(HiveTableCreator.getHiveColumnsForMetricsQuery()); + columns.addAll(HiveTableCreator.getPartitionKVsForHiveTable()); + return generateKylinTable(sinkTool, kylinConfig.getKylinMetricsSubjectQuery(), columns); + } + + public static TableDesc generateKylinTableForMetricsQueryCube(KylinConfig kylinConfig, SinkTool sinkTool) { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.addAll(HiveTableCreator.getHiveColumnsForMetricsQueryCube()); + columns.addAll(HiveTableCreator.getPartitionKVsForHiveTable()); + return generateKylinTable(sinkTool, kylinConfig.getKylinMetricsSubjectQueryCube(), columns); + } + + public static TableDesc generateKylinTableForMetricsQueryRPC(KylinConfig kylinConfig, SinkTool sinkTool) { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.addAll(HiveTableCreator.getHiveColumnsForMetricsQueryRPC()); + columns.addAll(HiveTableCreator.getPartitionKVsForHiveTable()); + return generateKylinTable(sinkTool, kylinConfig.getKylinMetricsSubjectQueryRpcCall(), columns); + } + + public static TableDesc generateKylinTableForMetricsJob(KylinConfig kylinConfig, SinkTool sinkTool) { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.addAll(HiveTableCreator.getHiveColumnsForMetricsJob()); + columns.addAll(HiveTableCreator.getPartitionKVsForHiveTable()); + return generateKylinTable(sinkTool, kylinConfig.getKylinMetricsSubjectJob(), columns); + } + + public static TableDesc generateKylinTableForMetricsJobException(KylinConfig kylinConfig, SinkTool sinkTool) { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.addAll(HiveTableCreator.getHiveColumnsForMetricsJobException()); + columns.addAll(HiveTableCreator.getPartitionKVsForHiveTable()); + return generateKylinTable(sinkTool, kylinConfig.getKylinMetricsSubjectJobException(), columns); + } + + public static TableDesc generateKylinTable(SinkTool sinkTool, String subject, List<Pair<String, String>> columns) { + TableDesc kylinTable = new TableDesc(); + + Pair<String, String> tableNameSplits = ActiveReservoirReporter + .getTableNameSplits(sinkTool.getTableNameForMetrics(subject)); + kylinTable.setUuid(UUID.randomUUID().toString()); + kylinTable.setDatabase(tableNameSplits.getFirst()); + kylinTable.setName(tableNameSplits.getSecond()); + kylinTable.setTableType(null); + kylinTable.setLastModified(0L); + kylinTable.setSourceType(sinkTool.getSourceType()); + + ColumnDesc[] columnDescs = new ColumnDesc[columns.size()]; + for (int i = 0; i < columns.size(); i++) { + columnDescs[i] = new ColumnDesc(); + Pair<String, String> entry = columns.get(i); + columnDescs[i].setId(Integer.toString(i + 1)); + columnDescs[i].setName(entry.getFirst()); + columnDescs[i].setDatatype(entry.getSecond()); + } + kylinTable.setColumns(columnDescs); + + kylinTable.init(); + + return kylinTable; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/74e167f0/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/ModelCreator.java ---------------------------------------------------------------------- diff --git a/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/ModelCreator.java b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/ModelCreator.java new file mode 100644 index 0000000..aa8fb6c --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/ModelCreator.java @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.tool.metrics.systemcube; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.util.List; + +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.metadata.MetadataManager; +import org.apache.kylin.metadata.model.DataModelDesc; +import org.apache.kylin.metadata.model.JoinTableDesc; +import org.apache.kylin.metadata.model.ModelDimensionDesc; +import org.apache.kylin.metadata.model.PartitionDesc; +import org.apache.kylin.metrics.lib.SinkTool; +import org.apache.kylin.metrics.lib.impl.RecordEvent; +import org.apache.kylin.metrics.lib.impl.TimePropertyEnum; +import org.apache.kylin.metrics.property.JobPropertyEnum; +import org.apache.kylin.metrics.property.QueryCubePropertyEnum; +import org.apache.kylin.metrics.property.QueryPropertyEnum; +import org.apache.kylin.metrics.property.QueryRPCPropertyEnum; +import org.apache.kylin.tool.metrics.systemcube.util.HiveSinkTool; + +import com.google.common.collect.Lists; + +public class ModelCreator { + + public static void main(String[] args) throws Exception { + // KylinConfig.setSandboxEnvIfPossible(); + KylinConfig config = KylinConfig.getInstanceFromEnv(); + + DataModelDesc kylinModel = generateKylinModelForMetricsQuery("ADMIN", config, new HiveSinkTool()); + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + DataOutputStream dout = new DataOutputStream(buf); + MetadataManager.MODELDESC_SERIALIZER.serialize(kylinModel, dout); + dout.close(); + buf.close(); + System.out.println(buf.toString()); + } + + public static PartitionDesc getPartitionDesc(String tableName) { + PartitionDesc partitionDesc = new PartitionDesc(); + + partitionDesc.setPartitionDateColumn(tableName + "." + TimePropertyEnum.DAY_DATE.toString()); + partitionDesc.setPartitionTimeColumn(tableName + "." + TimePropertyEnum.DAY_TIME.toString()); + return partitionDesc; + } + + public static DataModelDesc generateKylinModelForMetricsQuery(String owner, KylinConfig kylinConfig, + SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(kylinConfig.getKylinMetricsSubjectQuery()); + return generateKylinModel(owner, tableName, getDimensionsForMetricsQuery(), getMeasuresForMetricsQuery(), + getPartitionDesc(tableName)); + } + + public static DataModelDesc generateKylinModelForMetricsQueryCube(String owner, KylinConfig kylinConfig, + SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(kylinConfig.getKylinMetricsSubjectQueryCube()); + return generateKylinModel(owner, tableName, getDimensionsForMetricsQueryCube(), + getMeasuresForMetricsQueryCube(), getPartitionDesc(tableName)); + } + + public static DataModelDesc generateKylinModelForMetricsQueryRPC(String owner, KylinConfig kylinConfig, + SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(kylinConfig.getKylinMetricsSubjectQueryRpcCall()); + return generateKylinModel(owner, tableName, getDimensionsForMetricsQueryRPC(), getMeasuresForMetricsQueryRPC(), + getPartitionDesc(tableName)); + } + + public static DataModelDesc generateKylinModelForMetricsJob(String owner, KylinConfig kylinConfig, + SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(kylinConfig.getKylinMetricsSubjectJob()); + return generateKylinModel(owner, tableName, getDimensionsForMetricsJob(), getMeasuresForMetricsJob(), + getPartitionDesc(tableName)); + } + + public static DataModelDesc generateKylinModelForMetricsJobException(String owner, KylinConfig kylinConfig, + SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(kylinConfig.getKylinMetricsSubjectJobException()); + return generateKylinModel(owner, tableName, getDimensionsForMetricsJobException(), + getMeasuresForMetricsJobException(), getPartitionDesc(tableName)); + } + + public static List<String> getDimensionsForMetricsQuery() { + List<String> result = Lists.newLinkedList(); + result.add(RecordEvent.RecordReserveKeyEnum.HOST.toString()); + result.add(QueryPropertyEnum.USER.toString()); + result.add(QueryPropertyEnum.PROJECT.toString()); + result.add(QueryPropertyEnum.REALIZATION.toString()); + result.add(QueryPropertyEnum.REALIZATION_TYPE.toString()); + result.add(QueryPropertyEnum.TYPE.toString()); + result.add(QueryPropertyEnum.EXCEPTION.toString()); + + result.addAll(getTimeDimensionsForMetrics()); + return result; + } + + public static List<String> getMeasuresForMetricsQuery() { + List<String> result = Lists.newLinkedList(); + result.add(QueryPropertyEnum.ID_CODE.toString()); + result.add(QueryPropertyEnum.TIME_COST.toString()); + result.add(QueryPropertyEnum.CALCITE_RETURN_COUNT.toString()); + result.add(QueryPropertyEnum.STORAGE_RETURN_COUNT.toString()); + result.add(QueryPropertyEnum.AGGR_FILTER_COUNT.toString()); + + return result; + } + + public static List<String> getDimensionsForMetricsQueryCube() { + List<String> result = Lists.newLinkedList(); + result.add(RecordEvent.RecordReserveKeyEnum.HOST.toString()); + result.add(QueryCubePropertyEnum.PROJECT.toString()); + result.add(QueryCubePropertyEnum.CUBE.toString()); + result.add(QueryCubePropertyEnum.SEGMENT.toString()); + result.add(QueryCubePropertyEnum.CUBOID_SOURCE.toString()); + result.add(QueryCubePropertyEnum.CUBOID_TARGET.toString()); + result.add(QueryCubePropertyEnum.IF_MATCH.toString()); + result.add(QueryCubePropertyEnum.IF_SUCCESS.toString()); + + result.addAll(getTimeDimensionsForMetrics()); + return result; + } + + public static List<String> getMeasuresForMetricsQueryCube() { + List<String> result = Lists.newLinkedList(); + result.add(QueryCubePropertyEnum.WEIGHT_PER_HIT.toString()); + result.add(QueryCubePropertyEnum.CALL_COUNT.toString()); + result.add(QueryCubePropertyEnum.TIME_SUM.toString()); + result.add(QueryCubePropertyEnum.TIME_MAX.toString()); + result.add(QueryCubePropertyEnum.SKIP_COUNT.toString()); + result.add(QueryCubePropertyEnum.SCAN_COUNT.toString()); + result.add(QueryCubePropertyEnum.RETURN_COUNT.toString()); + result.add(QueryCubePropertyEnum.AGGR_FILTER_COUNT.toString()); + result.add(QueryCubePropertyEnum.AGGR_COUNT.toString()); + + return result; + } + + public static List<String> getDimensionsForMetricsQueryRPC() { + List<String> result = Lists.newLinkedList(); + result.add(RecordEvent.RecordReserveKeyEnum.HOST.toString()); + result.add(QueryRPCPropertyEnum.PROJECT.toString()); + result.add(QueryRPCPropertyEnum.REALIZATION.toString()); + result.add(QueryRPCPropertyEnum.RPC_SERVER.toString()); + result.add(QueryRPCPropertyEnum.EXCEPTION.toString()); + + result.addAll(getTimeDimensionsForMetrics()); + return result; + } + + public static List<String> getMeasuresForMetricsQueryRPC() { + List<String> result = Lists.newLinkedList(); + result.add(QueryRPCPropertyEnum.CALL_TIME.toString()); + result.add(QueryRPCPropertyEnum.RETURN_COUNT.toString()); + result.add(QueryRPCPropertyEnum.SCAN_COUNT.toString()); + result.add(QueryRPCPropertyEnum.SKIP_COUNT.toString()); + result.add(QueryRPCPropertyEnum.AGGR_FILTER_COUNT.toString()); + result.add(QueryRPCPropertyEnum.AGGR_COUNT.toString()); + + return result; + } + + public static List<String> getDimensionsForMetricsJob() { + List<String> result = Lists.newLinkedList(); + result.add(JobPropertyEnum.USER.toString()); + result.add(JobPropertyEnum.PROJECT.toString()); + result.add(JobPropertyEnum.CUBE.toString()); + result.add(JobPropertyEnum.TYPE.toString()); + result.add(JobPropertyEnum.ALGORITHM.toString()); + + result.addAll(getTimeDimensionsForMetrics()); + return result; + } + + public static List<String> getMeasuresForMetricsJob() { + List<String> result = Lists.newLinkedList(); + result.add(JobPropertyEnum.BUILD_DURATION.toString()); + result.add(JobPropertyEnum.SOURCE_SIZE.toString()); + result.add(JobPropertyEnum.CUBE_SIZE.toString()); + result.add(JobPropertyEnum.PER_BYTES_TIME_COST.toString()); + result.add(JobPropertyEnum.WAIT_RESOURCE_TIME.toString()); + + result.add(JobPropertyEnum.STEP_DURATION_DISTINCT_COLUMNS.toString()); + result.add(JobPropertyEnum.STEP_DURATION_DICTIONARY.toString()); + result.add(JobPropertyEnum.STEP_DURATION_INMEM_CUBING.toString()); + result.add(JobPropertyEnum.STEP_DURATION_HFILE_CONVERT.toString()); + + return result; + } + + public static List<String> getDimensionsForMetricsJobException() { + List<String> result = Lists.newLinkedList(); + result.add(JobPropertyEnum.USER.toString()); + result.add(JobPropertyEnum.PROJECT.toString()); + result.add(JobPropertyEnum.CUBE.toString()); + result.add(JobPropertyEnum.TYPE.toString()); + result.add(JobPropertyEnum.ALGORITHM.toString()); + result.add(JobPropertyEnum.EXCEPTION.toString()); + + result.addAll(getTimeDimensionsForMetrics()); + return result; + } + + public static List<String> getMeasuresForMetricsJobException() { + List<String> result = Lists.newLinkedList(); + result.add(JobPropertyEnum.ID_CODE.toString()); + + return result; + } + + public static List<String> getTimeDimensionsForMetrics() { + List<String> result = Lists.newLinkedList(); + result.add(RecordEvent.RecordReserveKeyEnum.TIME.toString()); + result.add(TimePropertyEnum.YEAR.toString()); + result.add(TimePropertyEnum.MONTH.toString()); + result.add(TimePropertyEnum.WEEK_BEGIN_DATE.toString()); + result.add(TimePropertyEnum.DAY_TIME.toString()); + result.add(TimePropertyEnum.DAY_DATE.toString()); + result.add(TimePropertyEnum.TIME_HOUR.toString()); + result.add(TimePropertyEnum.TIME_MINUTE.toString()); + + return result; + } + + public static DataModelDesc generateKylinModel(String owner, String tableName, List<String> dimensions, + List<String> measures, PartitionDesc partitionDesc) { + ModelDimensionDesc modelDimensionDesc = new ModelDimensionDesc(); + modelDimensionDesc.setTable(tableName); + modelDimensionDesc.setColumns(dimensions.toArray(new String[dimensions.size()])); + + DataModelDesc kylinModel = new DataModelDesc(); + kylinModel.setName(tableName.replace('.', '_')); + kylinModel.setOwner(owner); + kylinModel.setDescription(""); + kylinModel.setLastModified(0L); + kylinModel.setRootFactTableName(tableName); + kylinModel.setJoinTables(new JoinTableDesc[0]); + kylinModel.setDimensions(Lists.newArrayList(modelDimensionDesc)); + kylinModel.setMetrics(measures.toArray(new String[measures.size()])); + kylinModel.setFilterCondition(""); + kylinModel.setPartitionDesc(partitionDesc); + kylinModel.setCapacity(DataModelDesc.RealizationCapacity.SMALL); + kylinModel.updateRandomUuid(); + + return kylinModel; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/74e167f0/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/ProjectCreator.java ---------------------------------------------------------------------- diff --git a/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/ProjectCreator.java b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/ProjectCreator.java new file mode 100644 index 0000000..03e9b6e --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/ProjectCreator.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.tool.metrics.systemcube; + +import java.util.List; + +import javax.annotation.Nullable; + +import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.metadata.model.DataModelDesc; +import org.apache.kylin.metadata.model.TableDesc; +import org.apache.kylin.metadata.project.ProjectInstance; +import org.apache.kylin.metadata.project.ProjectStatusEnum; +import org.apache.kylin.metadata.project.RealizationEntry; +import org.apache.kylin.metadata.realization.RealizationType; +import org.apache.kylin.metrics.MetricsManager; + +import com.google.common.base.Function; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +public class ProjectCreator { + + private static final String SYSTEM_PROJECT_DESC = "This project is kylin's system project for kylin metrics"; + + public static ProjectInstance generateKylinProjectInstance(String owner, List<TableDesc> kylinTables, + List<DataModelDesc> kylinModels, List<CubeDesc> kylinCubeDescs) { + ProjectInstance projectInstance = new ProjectInstance(); + + projectInstance.setName(MetricsManager.SYSTEM_PROJECT); + projectInstance.setOwner(owner); + projectInstance.setDescription(SYSTEM_PROJECT_DESC); + projectInstance.setStatus(ProjectStatusEnum.ENABLED); + projectInstance.setCreateTimeUTC(0L); + projectInstance.updateRandomUuid(); + + if (kylinTables != null) + projectInstance.setTables(Sets.newHashSet(Lists.transform(kylinTables, new Function<TableDesc, String>() { + @Nullable + @Override + public String apply(@Nullable TableDesc tableDesc) { + if (tableDesc != null) { + return tableDesc.getIdentity(); + } + return null; + } + }))); + else + projectInstance.setTables(Sets.<String> newHashSet()); + + if (kylinModels != null) + projectInstance.setModels(Lists.transform(kylinModels, new Function<DataModelDesc, String>() { + @Nullable + @Override + public String apply(@Nullable DataModelDesc modelDesc) { + if (modelDesc != null) { + return modelDesc.getName(); + } + return null; + } + })); + else + projectInstance.setModels(Lists.<String> newArrayList()); + + if (kylinCubeDescs != null) + projectInstance + .setRealizationEntries(Lists.transform(kylinCubeDescs, new Function<CubeDesc, RealizationEntry>() { + @Nullable + @Override + public RealizationEntry apply(@Nullable CubeDesc cubeDesc) { + if (cubeDesc != null) { + RealizationEntry entry = new RealizationEntry(); + entry.setRealization(cubeDesc.getName()); + entry.setType(RealizationType.CUBE); + return entry; + } + return null; + } + })); + else + projectInstance.setRealizationEntries(Lists.<RealizationEntry> newArrayList()); + + return projectInstance; + } +}