halfway
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/f6208f8a Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/f6208f8a Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/f6208f8a Branch: refs/heads/KYLIN-2283 Commit: f6208f8ae330cf8260de63909c43a4ea5760c184 Parents: f2377db Author: Li Yang <liy...@apache.org> Authored: Thu Dec 15 18:51:00 2016 +0800 Committer: Li Yang <liy...@apache.org> Committed: Fri Dec 16 16:06:48 2016 +0800 ---------------------------------------------------------------------- .../apache/kylin/metadata/model/ColumnDesc.java | 8 ++ .../apache/kylin/metadata/model/TableDesc.java | 9 ++ .../kylin/source/datagen/ColumnGenConfig.java | 86 ++++++++++++ .../kylin/source/datagen/ColumnGenerator.java | 41 ++++++ .../source/datagen/ModelDataGenerator.java | 130 +++++++++++++++++++ .../kylin/source/datagen/TableGenConfig.java | 41 ++++++ .../org/apache/kylin/source/datagen/Util.java | 68 ++++++++++ 7 files changed, 383 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/f6208f8a/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java index 2da1f5e..7105ede 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java @@ -45,6 +45,10 @@ public class ColumnDesc implements Serializable { @JsonInclude(JsonInclude.Include.NON_NULL) private String comment; + @JsonProperty("data_gen") + @JsonInclude(JsonInclude.Include.NON_NULL) + private String dataGen; + // parsed from data type private DataType type; private DataType upgradedType; @@ -148,6 +152,10 @@ public class ColumnDesc implements Serializable { public void setNullable(boolean nullable) { this.isNullable = nullable; } + + public String getDataGen() { + return dataGen; + } public void init(TableDesc table) { this.table = table; http://git-wip-us.apache.org/repos/asf/kylin/blob/f6208f8a/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java index ab8c465..e845da1 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java @@ -26,6 +26,7 @@ import org.apache.kylin.common.persistence.RootPersistentEntity; import org.apache.kylin.common.util.StringSplitter; import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; import com.fasterxml.jackson.annotation.JsonProperty; @@ -47,6 +48,10 @@ public class TableDesc extends RootPersistentEntity implements ISourceAware { private int sourceType = ISourceAware.ID_HIVE; @JsonProperty("table_type") private String tableType; + + @JsonProperty("data_gen") + @JsonInclude(JsonInclude.Include.NON_NULL) + private String dataGen; private DatabaseDesc database = new DatabaseDesc(); @@ -160,6 +165,10 @@ public class TableDesc extends RootPersistentEntity implements ISourceAware { return getMaxColumnIndex() + 1; } + public String getDataGen() { + return dataGen; + } + public void init() { if (name != null) name = name.toUpperCase(); http://git-wip-us.apache.org/repos/asf/kylin/blob/f6208f8a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java new file mode 100644 index 0000000..91f5366 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.source.datagen; + +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.kylin.metadata.model.ColumnDesc; + +public class ColumnGenConfig { + + // discrete values + List<String> values; + + // random + boolean isRandom; + int randCardinality; + + // ID + boolean isID; + int idStart; + + // null handling + boolean genNull; + double genNullPct; + + public ColumnGenConfig(ColumnDesc col) { + init(col.getName(), col.getDataGen()); + } + + private void init(String col, String dataGen) { + + Map<String, String> config = Util.parseEqualCommaPairs(dataGen, "values"); + + values = Arrays.asList(Util.parseString(config, "values", "").split("|")); + + if ("ID".equals(values.get(0))) { + isID = true; + idStart = (values.size() > 1) ? Integer.parseInt(values.get(1)) : 0; + } else { + isRandom = true; + randCardinality = Util.parseInt(config, "cardinality", guessCardinality(col)); + } + + genNull = Util.parseBoolean(config, "genNull", guessGenNull(col)); + genNullPct = Util.parseDouble(config, "genNullPct", 0.01); + } + + private int guessCardinality(String col) { + for (String s : col.split("_")) { + if (s.startsWith("C")) { + try { + return Integer.parseInt(s.substring(1)); + } catch (Exception ex) { + // ok + } + } + } + return 0; + } + + private boolean guessGenNull(String col) { + return col.contains("_NULL"); + } + +} http://git-wip-us.apache.org/repos/asf/kylin/blob/f6208f8a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java new file mode 100644 index 0000000..c4c4fe7 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.source.datagen; + +import java.util.Iterator; + +import org.apache.kylin.metadata.model.ColumnDesc; + +public class ColumnGenerator { + + final private ColumnDesc targetCol; + final private int targetRows; + final private ModelDataGenerator modelGen; + + public ColumnGenerator(ColumnDesc col, int nRows, ModelDataGenerator modelGen) { + this.targetCol = col; + this.targetRows = nRows; + this.modelGen = modelGen; + } + + public Iterator<String> generate() { + return null; + } + +} http://git-wip-us.apache.org/repos/asf/kylin/blob/f6208f8a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java new file mode 100644 index 0000000..0f67377 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.source.datagen; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +import org.apache.kylin.common.persistence.ResourceStore; +import org.apache.kylin.metadata.model.ColumnDesc; +import org.apache.kylin.metadata.model.DataModelDesc; +import org.apache.kylin.metadata.model.JoinTableDesc; +import org.apache.kylin.metadata.model.TableDesc; + +import com.google.common.base.Preconditions; + +public class ModelDataGenerator { + + final private DataModelDesc model; + final private int targetRows; + final private ResourceStore outputStore; + final private String outputPath; + + public ModelDataGenerator(DataModelDesc model, int nRows) { + this(model, nRows, ResourceStore.getStore(model.getConfig()), "/data"); + } + + public ModelDataGenerator(DataModelDesc model, int nRows, ResourceStore outputStore, String outputPath) { + this.model = model; + this.targetRows = nRows; + this.outputStore = outputStore; + this.outputPath = outputPath; + } + + public void generate() throws IOException { + Set<TableDesc> generated = new HashSet<>(); + + JoinTableDesc[] allTables = model.getJoinTables(); + for (int i = allTables.length - 1; i >= 0; i--) { + TableDesc table = allTables[i].getTableRef().getTableDesc(); + if (generated.contains(table)) + continue; + + boolean gen = generateTable(table); + + if (gen) + generated.add(table); + } + + generateDDL(generated); + } + + private boolean generateTable(TableDesc table) throws IOException { + TableGenConfig config = new TableGenConfig(table); + if (!config.needGen) + return false; + + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + PrintWriter pout = new PrintWriter(new OutputStreamWriter(bout, "UTF-8")); + + generateTableInternal(table, config, pout); + + pout.close(); + bout.close(); + + outputStore.putResource(path(table), new ByteArrayInputStream(bout.toByteArray()), System.currentTimeMillis()); + return true; + } + + private void generateTableInternal(TableDesc table, TableGenConfig config, PrintWriter out) { + ColumnDesc[] columns = table.getColumns(); + ColumnGenerator[] colGens = new ColumnGenerator[columns.length]; + Iterator<String>[] colIters = new Iterator[columns.length]; + + int tableRows = (int) (targetRows * config.scaleFactor); + tableRows = Math.max(1, tableRows); + + for (int i = 0; i < columns.length; i++) { + colGens[i] = new ColumnGenerator(columns[i], tableRows, this); + colIters[i] = colGens[i].generate(); + } + + for (int i = 0; i < tableRows; i++) { + for (int c = 0; c < columns.length; c++) { + if (c > 0) + out.print(","); + + String v = colIters[c].next(); + Preconditions.checkState(v.contains(",") == false); + + out.print(v); + } + out.print("\n"); + } + } + + private void generateDDL(Set<TableDesc> generated) { + // TODO Auto-generated method stub + + } + + private String path(TableDesc table) { + return outputPath + "/" + table.getIdentity() + ".csv"; + } + + public DataModelDesc getModle() { + return model; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/f6208f8a/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java new file mode 100644 index 0000000..9e6fa73 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.source.datagen; + +import java.util.Map; + +import org.apache.kylin.metadata.model.TableDesc; + +public class TableGenConfig { + + boolean needGen; + double scaleFactor; + + public TableGenConfig(TableDesc table) { + init(table.getDataGen()); + } + + private void init(String dataGen) { + needGen = !dataGen.isEmpty(); + + Map<String, String> config = Util.parseEqualCommaPairs(dataGen, "scaleFactor"); + scaleFactor = Util.parseDouble(config, "scaleFactor", 1.0); + } + +} http://git-wip-us.apache.org/repos/asf/kylin/blob/f6208f8a/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java new file mode 100644 index 0000000..4b87ee2 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.source.datagen; + +import java.util.LinkedHashMap; +import java.util.Map; + +public class Util { + + static Map<String, String> parseEqualCommaPairs(String equalCommaPairs, String defaultKey) { + Map<String, String> r = new LinkedHashMap<>(); + for (String s : equalCommaPairs.split(",")) { + int equal = s.indexOf("="); + if (equal < 0) { + if (r.containsKey(defaultKey)) + throw new IllegalStateException(); + r.put(defaultKey, s.trim()); + } else { + r.put(s.substring(0, equal).trim(), s.substring(equal + 1).trim()); + } + } + return r; + } + + static double parseDouble(Map<String, String> config, String key, double dft) { + if (config.containsKey(key)) + return Double.parseDouble(config.get(key)); + else + return dft; + } + + static boolean parseBoolean(Map<String, String> config, String key, boolean dft) { + if (config.containsKey(key)) + return Boolean.parseBoolean(config.get(key)); + else + return dft; + } + + public static int parseInt(Map<String, String> config, String key, int dft) { + if (config.containsKey(key)) + return Integer.parseInt(config.get(key)); + else + return dft; + } + + public static String parseString(Map<String, String> config, String key, String dft) { + if (config.containsKey(key)) + return config.get(key); + else + return dft; + } +}