halfway
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e79626e7 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e79626e7 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e79626e7 Branch: refs/heads/KYLIN-2283 Commit: e79626e7c35a914d011f0425025f03e8fac1fbf9 Parents: f6208f8 Author: Yang Li <liy...@apache.org> Authored: Fri Dec 16 06:24:31 2016 +0800 Committer: Li Yang <liy...@apache.org> Committed: Fri Dec 16 16:06:48 2016 +0800 ---------------------------------------------------------------------- .../kylin/source/datagen/ColumnGenConfig.java | 30 ++- .../kylin/source/datagen/ColumnGenerator.java | 202 ++++++++++++++++++- 2 files changed, 222 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/e79626e7/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java index 91f5366..f174eef 100644 --- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java @@ -19,30 +19,34 @@ package org.apache.kylin.source.datagen; import java.util.Arrays; -import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; -import java.util.Set; import org.apache.kylin.metadata.model.ColumnDesc; public class ColumnGenConfig { + public static final String $RANDOM = "${RANDOM}"; + // discrete values + boolean isDiscrete; List<String> values; // random boolean isRandom; - int randCardinality; + String randFormat; + int randStart; + int randEnd; // ID boolean isID; int idStart; - // null handling + // general + int cardinality; boolean genNull; double genNullPct; + boolean order; public ColumnGenConfig(ColumnDesc col) { init(col.getName(), col.getDataGen()); @@ -53,17 +57,25 @@ public class ColumnGenConfig { Map<String, String> config = Util.parseEqualCommaPairs(dataGen, "values"); values = Arrays.asList(Util.parseString(config, "values", "").split("|")); + if (values.size() == 1 && values.get(0).isEmpty()) + values.set(0, $RANDOM); if ("ID".equals(values.get(0))) { isID = true; idStart = (values.size() > 1) ? Integer.parseInt(values.get(1)) : 0; - } else { + } else if (values.get(0).contains($RANDOM)) { isRandom = true; - randCardinality = Util.parseInt(config, "cardinality", guessCardinality(col)); + randFormat = values.get(0); + randStart = (values.size() > 1) ? Integer.parseInt(values.get(1)) : 0; + randEnd = (values.size() > 2) ? Integer.parseInt(values.get(2)) : 0; + } else { + isDiscrete = true; } - genNull = Util.parseBoolean(config, "genNull", guessGenNull(col)); - genNullPct = Util.parseDouble(config, "genNullPct", 0.01); + cardinality = Util.parseInt(config, "card", guessCardinality(col)); + genNull = Util.parseBoolean(config, "null", guessGenNull(col)); + genNullPct = Util.parseDouble(config, "nullPct", 0.01); + order = Util.parseBoolean(config, "order", false); } private int guessCardinality(String col) { http://git-wip-us.apache.org/repos/asf/kylin/blob/e79626e7/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java index c4c4fe7..409a1e9 100644 --- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java @@ -18,24 +18,224 @@ package org.apache.kylin.source.datagen; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; import java.util.Iterator; +import java.util.List; +import java.util.Random; +import java.util.TreeSet; +import org.apache.kylin.metadata.datatype.DataType; import org.apache.kylin.metadata.model.ColumnDesc; public class ColumnGenerator { + final private ColumnGenConfig conf; final private ColumnDesc targetCol; final private int targetRows; final private ModelDataGenerator modelGen; public ColumnGenerator(ColumnDesc col, int nRows, ModelDataGenerator modelGen) { + this.conf = new ColumnGenConfig(col); this.targetCol = col; this.targetRows = nRows; this.modelGen = modelGen; } public Iterator<String> generate() { - return null; + Iterator<String> result; + if (conf.isID) { + result = new IDIter(conf.idStart); + } else if (conf.isRandom) { + result = new RandomIter(targetCol.getType(), conf.randFormat, conf.randStart, Math.max(conf.randEnd, conf.randStart + conf.cardinality)); + } else { + result = new DiscreteIter(conf.values); + } + + if (conf.cardinality > 0) { + result = new CardinalityIter(result, conf.cardinality); + } + + if (conf.genNull) { + result = new AddNullIter(result, conf.genNullPct); + } + + if (conf.order) { + result = new OrderIter(result, targetRows); + } + + return result; } + private static class RandomIter implements Iterator<String> { + + public RandomIter(DataType type, String format, int randStart, int randEnd) { + // TODO Auto-generated constructor stub + } + + @Override + public boolean hasNext() { + // TODO Auto-generated method stub + return false; + } + + @Override + public String next() { + // TODO Auto-generated method stub + return null; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + } + + private static class IDIter implements Iterator<String> { + + int next; + + public IDIter(int start) { + next = start; + } + + @Override + public boolean hasNext() { + return true; + } + + @Override + public String next() { + return "" + (next++); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + + private static class DiscreteIter implements Iterator<String> { + + private List<String> values; + private Random rand = new Random(); + + public DiscreteIter(List<String> values) { + this.values = values; + } + + @Override + public boolean hasNext() { + return true; + } + + @Override + public String next() { + if (values.isEmpty()) + return null; + else + return values.get(rand.nextInt(values.size())); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + + private static class CardinalityIter implements Iterator<String> { + + private Iterator<String> input; + private int card; + private TreeSet<String> cache; + + public CardinalityIter(Iterator<String> input, int card) { + assert card > 0; + this.input = input; + this.card = card; + this.cache = new TreeSet<String>(); + } + + @Override + public boolean hasNext() { + return input.hasNext(); + } + + @Override + public String next() { + String r = input.next(); + + if (cache.size() < card) { + cache.add(r); + return r; + } + + r = cache.floor(r); + return r == null ? cache.first() : r; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + + private static class AddNullIter implements Iterator<String> { + + private Iterator<String> input; + private double nullPct; + private Random rand = new Random(); + + public AddNullIter(Iterator<String> input, double nullPct) { + this.input = input; + this.nullPct = nullPct; + } + + @Override + public boolean hasNext() { + return true; + } + + @Override + public String next() { + return rand.nextDouble() < nullPct || !input.hasNext() ? null : input.next(); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + + private static class OrderIter implements Iterator<String> { + + private Iterator<String> iter; + + public OrderIter(Iterator<String> input, int targetRows) { + ArrayList<String> cache = new ArrayList<>(targetRows); + for (int i = 0; i < targetRows; i++) { + cache.add(input.next()); + } + Collections.sort(cache); + + iter = cache.iterator(); + } + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public String next() { + return iter.next(); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } }