http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ProductsReader.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ProductsReader.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ProductsReader.java new file mode 100644 index 0000000..91db010 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ProductsReader.java @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datareaders; + +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.util.List; +import java.util.Map; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.PetSpecies; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Product; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ProductCategory; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ProductCategoryBuilder; + +import com.google.common.collect.Lists; +import com.google.gson.Gson; + +public class ProductsReader +{ + InputStream path; + + public ProductsReader(InputStream path) + { + this.path = path; + } + + protected Product parseProduct(Object productJson) + { + Map<String, Object> fields = (Map<String, Object>) productJson; + Product product = new Product(fields); + return product; + } + + protected ProductCategory parseProductCategory(Object productCategoryObject) throws Exception + { + Map<String, Object> jsonProductCategory = (Map<String, Object>) productCategoryObject; + + ProductCategoryBuilder builder = new ProductCategoryBuilder(); + + for(Map.Entry<String, Object> entry : jsonProductCategory.entrySet()) + { + Object key = entry.getKey(); + Object value = entry.getValue(); + + if(key.equals("category")) + { + builder.setCategory( (String) entry.getValue()); + } + else if(key.equals("species")) + { + for(String species : (List<String>) value) + { + if(species.equals("dog")) + { + builder.addApplicableSpecies(PetSpecies.DOG); + } + else if(species.equals("cat")) + { + builder.addApplicableSpecies(PetSpecies.CAT); + } + else + { + throw new Exception("Invalid species " + species + " encountered when parsing product categories JSON."); + } + } + } + else if(key.equals("trigger_transaction")) + { + builder.setTriggerTransaction((Boolean) entry.getValue()); + } + else if(key.equals("fields")) + { + for(String fieldName : (List<String>) value) + { + builder.addFieldName(fieldName); + } + } + else if(key.equals("daily_usage_rate")) + { + builder.setDailyUsageRate((Double) value); + } + else if(key.equals("base_amount_used_average")) + { + builder.setAmountUsedPetPetAverage((Double) value); + } + else if(key.equals("base_amount_used_variance")) + { + builder.setAmountUsedPetPetVariance((Double) value); + } + else if(key.equals("transaction_trigger_rate")) + { + builder.setTriggerTransactionRate((Double) value); + } + else if(key.equals("transaction_purchase_rate")) + { + builder.setTriggerPurchaseRate((Double) value); + } + else if(key.equals("items")) + { + for(Object productJson : (List<Object>) value) + { + Product product = parseProduct(productJson); + builder.addProduct(product); + } + } + else + { + throw new Exception("Invalid field " + key + " encountered when parsing product categories JSON."); + } + + } + + return builder.build(); + } + + public List<ProductCategory> readData() throws Exception + { + Gson gson = new Gson(); + + Reader reader = new InputStreamReader(path); + Object json = gson.fromJson(reader, Object.class); + + List<Object> productCategoryObjects = (List<Object>) json; + + List<ProductCategory> productCategories = Lists.newArrayList(); + + for(Object obj : productCategoryObjects) + { + ProductCategory productCategory = parseProductCategory(obj); + productCategories.add(productCategory); + } + + reader.close(); + + return productCategories; + + } +}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java new file mode 100644 index 0000000..a4ccdd6 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java @@ -0,0 +1,193 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datareaders; + +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Scanner; +import java.util.Set; +import java.util.Vector; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; + +public class ZipcodeReader +{ + private static class ZipcodeLocationRecord + { + public final Pair<Double, Double> coordinates; + public final String state; + public final String city; + + public ZipcodeLocationRecord(Pair<Double, Double> coordinates, + String city, String state) + { + this.coordinates = coordinates; + this.city = city; + this.state = state; + } + } + + InputStream zipcodeIncomesFile = null; + InputStream zipcodePopulationFile = null; + InputStream zipcodeCoordinatesFile = null; + + public void setIncomesFile(InputStream path) + { + this.zipcodeIncomesFile = path; + } + + public void setPopulationFile(InputStream path) + { + this.zipcodePopulationFile = path; + } + + public void setCoordinatesFile(InputStream path) + { + this.zipcodeCoordinatesFile = path; + } + + private ImmutableMap<String, Double> readIncomeData(InputStream path) throws FileNotFoundException + { + Scanner scanner = new Scanner(path); + + // skip headers + scanner.nextLine(); + scanner.nextLine(); + + Map<String, Double> entries = Maps.newHashMap(); + while(scanner.hasNextLine()) + { + String line = scanner.nextLine().trim(); + String[] cols = line.split(","); + // zipcodes are in the form "ZCTA5 XXXXX" + String zipcode = cols[2].split(" ")[1].trim(); + try + { + double medianHouseholdIncome = Integer.parseInt(cols[5].trim()); + entries.put(zipcode, medianHouseholdIncome); + } + catch(NumberFormatException e) + { + + } + } + + scanner.close(); + + return ImmutableMap.copyOf(entries); + } + + private ImmutableMap<String, Long> readPopulationData(InputStream path) throws FileNotFoundException + { + Scanner scanner = new Scanner(path); + + // skip header + scanner.nextLine(); + + Map<String, Long> entries = Maps.newHashMap(); + while(scanner.hasNextLine()) + { + String line = scanner.nextLine().trim(); + + if(line.length() == 0) + continue; + + String[] cols = line.split(","); + + String zipcode = cols[0].trim(); + Long population = Long.parseLong(cols[1].trim()); + + if(entries.containsKey(zipcode)) + { + entries.put(zipcode, Math.max(entries.get(zipcode), population)); + } + else + { + entries.put(zipcode, population); + } + } + + scanner.close(); + + return ImmutableMap.copyOf(entries); + } + + private ImmutableMap<String, ZipcodeLocationRecord> readCoordinates(InputStream path) throws FileNotFoundException + { + Scanner scanner = new Scanner(path); + + // skip header + scanner.nextLine(); + + Map<String, ZipcodeLocationRecord> entries = Maps.newHashMap(); + while(scanner.hasNextLine()) + { + String line = scanner.nextLine().trim(); + + String[] cols = line.split(", "); + + // remove quote marks + String zipcode = cols[0].substring(1, cols[0].length() - 1); + String state = cols[1].substring(1, cols[1].length() - 1); + Double latitude = Double.parseDouble(cols[2].substring(1, cols[2].length() - 1)); + Double longitude = Double.parseDouble(cols[3].substring(1, cols[3].length() - 1)); + String city = cols[4].substring(1, cols[4].length() - 1); + + Pair<Double, Double> coords = new Pair<Double, Double>(latitude, longitude); + + ZipcodeLocationRecord record = new ZipcodeLocationRecord(coords, city, state); + + entries.put(zipcode, record); + } + + scanner.close(); + + return ImmutableMap.copyOf(entries); + } + + public ImmutableList<ZipcodeRecord> readData() throws FileNotFoundException + { + ImmutableMap<String, Double> incomes = readIncomeData(this.zipcodeIncomesFile); + ImmutableMap<String, Long> populations = readPopulationData(this.zipcodePopulationFile); + ImmutableMap<String, ZipcodeLocationRecord> coordinates = readCoordinates(this.zipcodeCoordinatesFile); + + Set<String> zipcodeSubset = new HashSet<String>(incomes.keySet()); + zipcodeSubset.retainAll(populations.keySet()); + zipcodeSubset.retainAll(coordinates.keySet()); + + List<ZipcodeRecord> table = new Vector<ZipcodeRecord>(); + for(String zipcode : zipcodeSubset) + { + ZipcodeRecord record = new ZipcodeRecord(zipcode, + coordinates.get(zipcode).coordinates, + coordinates.get(zipcode).city, + coordinates.get(zipcode).state, + incomes.get(zipcode), + populations.get(zipcode)); + table.add(record); + } + + return ImmutableList.copyOf(table); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java new file mode 100644 index 0000000..aea004e --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework; + +import java.util.Random; + +public class SeedFactory +{ + Random rng; + + public SeedFactory() + { + rng = new Random(); + } + + public SeedFactory(long seed) + { + rng = new Random(seed); + } + + public long getNextSeed() + { + return rng.nextLong(); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java new file mode 100644 index 0000000..cf2a40d --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels; + +import java.io.Serializable; +import java.util.Map; + +public class MarkovModel<T> implements Serializable +{ + final Map<T, Map<T, Double>> transitionWeights; + final Map<T, Double> startWeights; + + public MarkovModel(Map<T, Map<T, Double>> transitionWeights, Map<T, Double> startWeights) + { + this.transitionWeights = transitionWeights; + this.startWeights = startWeights; + } + + public Map<T, Map<T, Double>> getTransitionWeights() + { + return transitionWeights; + } + + public Map<T, Double> getStartWeights() + { + return startWeights; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java new file mode 100644 index 0000000..861c0ef --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableTable; + +public class MarkovModelBuilder<S> +{ + ImmutableTable.Builder<S, S, Double> transitionWeights; + ImmutableMap.Builder<S, Double> startWeights; + + public MarkovModelBuilder() + { + transitionWeights = ImmutableTable.builder(); + startWeights = ImmutableMap.builder(); + } + + public static <T> MarkovModelBuilder<T> create() + { + return new MarkovModelBuilder<T>(); + } + + public void addStartState(S state, double weight) + { + startWeights.put(state, weight); + } + + public void addTransition(S state1, S state2, double weight) + { + transitionWeights.put(state1, state2, weight); + } + + public MarkovModel<S> build() + { + return new MarkovModel<S>(transitionWeights.build().rowMap(), startWeights.build()); + } + + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java new file mode 100644 index 0000000..2a72e65 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels; + +import java.util.Map; + +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.RouletteWheelSampler; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableTable; + +public class MarkovProcess<T> implements Sampler<T> +{ + final ImmutableMap<T, Sampler<T>> transitionSamplers; + final Sampler<T> startStateSampler; + + T currentState; + + + public MarkovProcess(MarkovModel<T> model, SeedFactory factory) + { + Map<T, Map<T, Double>> transitionTable = model.getTransitionWeights(); + + startStateSampler = RouletteWheelSampler.create(model.getStartWeights(), factory); + + ImmutableMap.Builder<T, Sampler<T>> builder = ImmutableMap.builder(); + for(Map.Entry<T, Map<T, Double>> entry : transitionTable.entrySet()) + { + builder.put(entry.getKey(), RouletteWheelSampler.create(entry.getValue(), factory)); + } + + + this.transitionSamplers = builder.build(); + + currentState = null; + } + + public static <T> MarkovProcess<T> create(MarkovModel<T> model, SeedFactory factory) + { + return new MarkovProcess<T>(model, factory); + } + + public T sample() throws Exception + { + if(currentState == null) + { + currentState = startStateSampler.sample(); + return currentState; + } + + currentState = transitionSamplers.get(currentState).sample(); + return currentState; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java new file mode 100644 index 0000000..f879870 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java @@ -0,0 +1,23 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs; + +public interface ConditionalProbabilityDensityFunction<T, S> +{ + public double probability(T datum, S conditionalDatum); + + public ProbabilityDensityFunction<T> fixConditional(S conditionalDatum); +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/DiscretePDF.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/DiscretePDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/DiscretePDF.java new file mode 100644 index 0000000..9d0d6f2 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/DiscretePDF.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs; + +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.ImmutableMap; + +public class DiscretePDF<T> implements ProbabilityDensityFunction<T> +{ + private final ImmutableMap<T, Double> probabilities; + + public DiscretePDF(Map<T, Double> probabilities) + { + this.probabilities = ImmutableMap.copyOf(probabilities); + } + + public Set<T> getData() + { + return probabilities.keySet(); + } + + public double probability(T value) + { + if(probabilities.containsKey(value)) + { + return probabilities.get(value); + } + + return 0.0; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java new file mode 100644 index 0000000..dcc1278 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs; + +public class ExponentialPDF implements ProbabilityDensityFunction<Double> +{ + private final double lambda; + + public ExponentialPDF(double lambda) + { + this.lambda = lambda; + } + + public double probability(Double value) + { + return lambda * Math.exp(-1.0 * value * lambda); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java new file mode 100644 index 0000000..55ebc93 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs; + +public class GaussianPDF implements ProbabilityDensityFunction<Double> +{ + private double mean; + private double std; + + public GaussianPDF(double mean, double std) + { + this.mean = mean; + this.std = std; + } + + public double probability(Double value) + { + double diff = (mean - value) * (mean - value); + double var = std * std; + double exp = Math.exp(-1.0 * diff / (2.0 * var)); + + return exp / (std * Math.sqrt(2.0 * Math.PI)); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java new file mode 100644 index 0000000..fdf2db0 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs; + +import java.util.List; + +public class JointPDF<T> implements ProbabilityDensityFunction<T> +{ + + double normalizationFactor; + ProbabilityDensityFunction<T>[] pdfs; + + public JointPDF(List<T> data, ProbabilityDensityFunction<T> ... pdfs) + { + this.pdfs = pdfs; + + normalizationFactor = 0.0d; + for(T datum : data) + { + double prob = 1.0; + for(ProbabilityDensityFunction<T> pdf : pdfs) + prob *= pdf.probability(datum); + normalizationFactor += prob; + } + + } + + public double probability(T datum) + { + double weight = 1.0; + for(ProbabilityDensityFunction<T> pdf : pdfs) + weight *= pdf.probability(datum); + + return weight / normalizationFactor; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java new file mode 100644 index 0000000..1b691ca --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs; + +public interface ProbabilityDensityFunction<T> +{ + public double probability(T datum); +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java new file mode 100644 index 0000000..ea8e77e --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs; + +public class UniformPDF<T> implements ProbabilityDensityFunction<T> +{ + private final double probability; + + public UniformPDF(long count) + { + probability = 1.0 / ((double) count); + } + + public UniformPDF(double probability) + { + this.probability = probability; + } + + public double probability(T datum) + { + return probability; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java new file mode 100644 index 0000000..475b24d --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +import java.util.List; +import java.util.Random; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair; +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; + +import com.google.common.collect.ImmutableList; + +public class BoundedMultiModalGaussianSampler implements Sampler<Double> +{ + ImmutableList<Pair<Double, Double>> distributions; + + double min; + double max; + Random rng; + + public BoundedMultiModalGaussianSampler(List<Pair<Double, Double>> distributions, double min, double max, SeedFactory seedFactory) + { + rng = new Random(seedFactory.getNextSeed()); + this.distributions = ImmutableList.copyOf(distributions); + + this.min = min; + this.max = max; + } + + public Double sample() + { + int idx = rng.nextInt(distributions.size()); + + double mean = distributions.get(idx).getFirst(); + double std = distributions.get(idx).getSecond(); + + double value = mean + rng.nextGaussian() * std; + + value = Math.min(value, this.max); + value = Math.max(value, this.min); + + return value; + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java new file mode 100644 index 0000000..54506e2 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java @@ -0,0 +1,23 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +public interface ConditionalSampler<T, S> +{ + public T sample(S conditional) throws Exception; + + public Sampler<T> fixConditional(S conditional) throws Exception; +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java new file mode 100644 index 0000000..82e4d2d --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +public class DoubleSequenceSampler implements Sampler<Double> +{ + Double start; + Double end; + Double step; + Double next; + + public DoubleSequenceSampler() + { + start = 0.0; + end = null; + step = 1.0; + next = start; + } + + public DoubleSequenceSampler(Double start) + { + this.start = start; + end = null; + step = 1.0; + next = start; + } + + public DoubleSequenceSampler(Double start, Double end) + { + this.start = start; + this.end = end; + step = 1.0; + next = start; + } + + public DoubleSequenceSampler(Double start, Double end, Double step) + { + this.start = start; + this.end = end; + this.step = step; + next = start; + } + + public Double sample() throws Exception + { + if(end == null || next < end) + { + Double current = next; + next = current + step; + return current; + } + + throw new Exception("All values have been sampled"); + } + + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java new file mode 100644 index 0000000..082f3ac --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +import java.util.Random; + +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; + +public class ExponentialSampler implements Sampler<Double> +{ + final private Random rng; + final private double lambda; + + public ExponentialSampler(double lambda, SeedFactory seedFactory) + { + rng = new Random(seedFactory.getNextSeed()); + this.lambda = lambda; + } + + public Double sample() + { + return - Math.log(1.0 - rng.nextDouble()) / lambda; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java new file mode 100644 index 0000000..ed40cc8 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +import java.util.Random; + +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; + +public class GaussianSampler implements Sampler<Double> +{ + double mean; + double std; + Random rng; + + public GaussianSampler(double mean, double std, SeedFactory seedFactory) + { + rng = new Random(seedFactory.getNextSeed()); + this.mean = mean; + this.std = std; + } + + public Double sample() + { + return rng.nextGaussian() * std + mean; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java new file mode 100644 index 0000000..0db8200 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +import java.util.Random; + +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction; + + +public class MonteCarloSampler<T> implements Sampler<T> +{ + private final Sampler<T> stateSampler; + private final Random rng; + private final ProbabilityDensityFunction<T> acceptancePDF; + + public MonteCarloSampler(Sampler<T> stateGenerator, + ProbabilityDensityFunction<T> acceptancePDF, + SeedFactory seedFactory) + { + this.acceptancePDF = acceptancePDF; + this.stateSampler = stateGenerator; + + rng = new Random(seedFactory.getNextSeed()); + } + + public T sample() throws Exception + { + while(true) + { + T proposedState = this.stateSampler.sample(); + double probability = acceptancePDF.probability(proposedState); + double r = rng.nextDouble(); + + if(r < probability) + { + return proposedState; + } + } + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/RouletteWheelSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/RouletteWheelSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/RouletteWheelSampler.java new file mode 100644 index 0000000..72681f8 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/RouletteWheelSampler.java @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +import java.util.Collection; +import java.util.Map; +import java.util.Random; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair; +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.DiscretePDF; +import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Maps; + +public class RouletteWheelSampler<T> implements Sampler<T> +{ + Random rng; + final ImmutableList<Pair<T, Double>> wheel; + + public static <T> RouletteWheelSampler<T> create(Map<T, Double> domainWeights, SeedFactory factory) + { + return new RouletteWheelSampler<T>(domainWeights, factory); + } + + public static <T> RouletteWheelSampler<T> create(DiscretePDF<T> pdf, SeedFactory factory) + { + return new RouletteWheelSampler<T>(pdf.getData(), pdf, factory); + } + + public static <T> RouletteWheelSampler<T> create(Collection<T> data, ProbabilityDensityFunction<T> pdf, SeedFactory factory) + { + return new RouletteWheelSampler<T>(data, pdf, factory); + } + + public static <T> RouletteWheelSampler<T> createUniform(Collection<T> data, SeedFactory factory) + { + Map<T, Double> pdf = Maps.newHashMap(); + for(T datum : data) + { + pdf.put(datum, 1.0); + } + + return create(pdf, factory); + } + + public RouletteWheelSampler(Map<T, Double> domainWeights, SeedFactory factory) + { + this.rng = new Random(factory.getNextSeed()); + this.wheel = this.normalize(domainWeights); + } + + public RouletteWheelSampler(Collection<T> data, ProbabilityDensityFunction<T> pdf, SeedFactory factory) + { + this.rng = new Random(factory.getNextSeed()); + + Map<T, Double> domainWeights = Maps.newHashMap(); + for(T datum : data) + { + double prob = pdf.probability(datum); + domainWeights.put(datum, prob); + } + + this.wheel = this.normalize(domainWeights); + } + + private ImmutableList<Pair<T, Double>> normalize(Map<T, Double> domainWeights) + { + double weightSum = 0.0; + for(Map.Entry<T, Double> entry : domainWeights.entrySet()) + { + weightSum += entry.getValue(); + } + + double cumProb = 0.0; + ImmutableList.Builder<Pair<T, Double>> builder = ImmutableList.builder(); + for(Map.Entry<T, Double> entry : domainWeights.entrySet()) + { + double prob = entry.getValue() / weightSum; + cumProb += prob; + + builder.add(Pair.create(entry.getKey(), cumProb)); + } + + return builder.build(); + } + + public T sample() + { + double r = rng.nextDouble(); + for(Pair<T, Double> cumProbPair : wheel) + if(r < cumProbPair.getSecond()) + return cumProbPair.getFirst(); + + throw new IllegalStateException("Invalid state -- RouletteWheelSampler should never fail to sample!"); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/Sampler.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/Sampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/Sampler.java new file mode 100644 index 0000000..08af7e0 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/Sampler.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +public interface Sampler<T> +{ + public T sample() throws Exception; +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/SequenceSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/SequenceSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/SequenceSampler.java new file mode 100644 index 0000000..a81c846 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/SequenceSampler.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +public class SequenceSampler implements Sampler<Integer> +{ + Integer start; + Integer end; + Integer step; + Integer next; + + public SequenceSampler() + { + start = 0; + end = null; + step = 1; + next = start; + } + + public SequenceSampler(Integer start) + { + this.start = start; + end = null; + step = 1; + next = start; + } + + public SequenceSampler(Integer start, Integer end) + { + this.start = start; + this.end = end; + step = 1; + next = start; + } + + public SequenceSampler(Integer start, Integer end, Integer step) + { + this.start = start; + this.end = end; + this.step = step; + next = start; + } + + public Integer sample() throws Exception + { + if(end == null || next < end) + { + Integer current = next; + next = current + step; + return current; + } + + throw new Exception("All values have been sampled"); + } + + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/StatefulMonteCarloSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/StatefulMonteCarloSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/StatefulMonteCarloSampler.java new file mode 100644 index 0000000..c447692 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/StatefulMonteCarloSampler.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +import java.util.Random; + +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ConditionalProbabilityDensityFunction; + + +public class StatefulMonteCarloSampler<T> implements Sampler<T> +{ + private final Sampler<T> stateSampler; + private final Random rng; + private final ConditionalProbabilityDensityFunction<T, T> acceptancePDF; + private T currentState; + + public StatefulMonteCarloSampler(Sampler<T> stateGenerator, + ConditionalProbabilityDensityFunction<T, T> acceptancePDF, + T initialState, + SeedFactory seedFactory) + { + this.acceptancePDF = acceptancePDF; + this.stateSampler = stateGenerator; + + rng = new Random(seedFactory.getNextSeed()); + + this.currentState = initialState; + } + + public T sample() throws Exception + { + while(true) + { + T proposedState = this.stateSampler.sample(); + double probability = acceptancePDF.probability(proposedState, currentState); + double r = rng.nextDouble(); + + if(r < probability) + { + this.currentState = proposedState; + return proposedState; + } + } + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformIntSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformIntSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformIntSampler.java new file mode 100644 index 0000000..3fdf550 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformIntSampler.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +import java.util.Random; + +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; + +public class UniformIntSampler implements Sampler<Integer> +{ + int lowerbound; + int upperbound; + Random rng; + + /* + * Upperbound is inclusive + */ + public UniformIntSampler(int lowerbound, int upperbound, SeedFactory seedFactory) + { + this.lowerbound = lowerbound; + this.upperbound = upperbound; + rng = new Random(seedFactory.getNextSeed()); + } + + public Integer sample() + { + int range = upperbound + 1 - lowerbound; + return rng.nextInt(range) + lowerbound; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformSampler.java new file mode 100644 index 0000000..3f78471 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/UniformSampler.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +import java.util.Random; + +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; + +public class UniformSampler implements Sampler<Double> +{ + final Random rng; + final double lowerbound; + final double upperbound; + + public UniformSampler(SeedFactory seedFactory) + { + rng = new Random(seedFactory.getNextSeed()); + lowerbound = 0.0; + upperbound = 1.0; + } + + public UniformSampler(double lowerbound, double upperbound, SeedFactory seedFactory) + { + rng = new Random(seedFactory.getNextSeed()); + this.lowerbound = lowerbound; + this.upperbound = upperbound; + } + + public Double sample() + { + return (upperbound - lowerbound) * rng.nextDouble() + lowerbound; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/ConditionalWeightFunction.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/ConditionalWeightFunction.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/ConditionalWeightFunction.java new file mode 100644 index 0000000..21d0109 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/ConditionalWeightFunction.java @@ -0,0 +1,23 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.wfs; + +public interface ConditionalWeightFunction<T, S> +{ + public double weight(T datum, S given); + + public WeightFunction<T> fixConditional(S given); +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/WeightFunction.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/WeightFunction.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/WeightFunction.java new file mode 100644 index 0000000..1145043 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/wfs/WeightFunction.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.wfs; + +public interface WeightFunction<T> +{ + public double weight(T datum); +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerLocationPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerLocationPDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerLocationPDF.java new file mode 100644 index 0000000..bb97a60 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerLocationPDF.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.generators.customer; + +import java.util.List; +import java.util.Map; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; + +public class CustomerLocationPDF implements ProbabilityDensityFunction<ZipcodeRecord> +{ + private final Map<ZipcodeRecord, Double> pdf; + + public CustomerLocationPDF(List<ZipcodeRecord> zipcodes, Store store, double averageDistance) + { + this.pdf = build(zipcodes, store, averageDistance); + } + + protected ImmutableMap<ZipcodeRecord, Double> build(List<ZipcodeRecord> zipcodeTable, + Store store, double averageDistance) + { + double lambda = 1.0 / averageDistance; + + Map<ZipcodeRecord, Double> zipcodeWeights = Maps.newHashMap(); + double totalWeight = 0.0; + for(ZipcodeRecord record : zipcodeTable) + { + double dist = record.distance(store.getLocation()); + + double weight = lambda * Math.exp(-1.0 * lambda * dist); + totalWeight += weight; + zipcodeWeights.put(record, weight); + } + + Map<ZipcodeRecord, Double> pdf = Maps.newHashMap(); + for(ZipcodeRecord record : zipcodeTable) + { + pdf.put(record, zipcodeWeights.get(record) / totalWeight); + } + + return ImmutableMap.copyOf(pdf); + } + + public double probability(ZipcodeRecord record) + { + if(!this.pdf.containsKey(record)) + return 0.0; + + return this.pdf.get(record); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSampler.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSampler.java new file mode 100644 index 0000000..4e5689c --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSampler.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.generators.customer; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Customer; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.ConditionalSampler; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler; + +public class CustomerSampler implements Sampler<Customer> +{ + private final Sampler<Integer> idSampler; + private final Sampler<String> firstNameSampler; + private final Sampler<String> lastNameSampler; + private final Sampler<Store> storeSampler; + private final ConditionalSampler<ZipcodeRecord, Store> locationSampler; + + + public CustomerSampler(Sampler<Integer> idSampler, Sampler<String> firstNameSampler, + Sampler<String> lastNameSampler, Sampler<Store> storeSampler, + ConditionalSampler<ZipcodeRecord, Store> locationSampler) + { + this.idSampler = idSampler; + this.firstNameSampler = firstNameSampler; + this.lastNameSampler = lastNameSampler; + this.storeSampler = storeSampler; + this.locationSampler = locationSampler; + } + + public Customer sample() throws Exception + { + Integer id = idSampler.sample(); + Pair<String, String> name = Pair.create(firstNameSampler.sample(), + lastNameSampler.sample()); + Store store = storeSampler.sample(); + ZipcodeRecord location = locationSampler.sample(store); + + return new Customer(id, name, store, location); + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSamplerBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSamplerBuilder.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSamplerBuilder.java new file mode 100644 index 0000000..209b099 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerSamplerBuilder.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.generators.customer; + +import java.util.List; +import java.util.Map; + +import org.apache.bigtop.bigpetstore.datagenerator.Constants; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Customer; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.InputData; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.ConditionalSampler; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.RouletteWheelSampler; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.SequenceSampler; + +import com.google.common.collect.Maps; + +public class CustomerSamplerBuilder +{ + private final List<Store> stores; + private final InputData inputData; + private final SeedFactory seedFactory; + + public CustomerSamplerBuilder(List<Store> stores, InputData inputData, SeedFactory seedFactory) + { + this.stores = stores; + this.seedFactory = seedFactory; + this.inputData = inputData; + } + + protected ConditionalSampler<ZipcodeRecord, Store> buildLocationSampler() + { + final Map<Store, Sampler<ZipcodeRecord>> locationSamplers = Maps.newHashMap(); + for(Store store : stores) + { + ProbabilityDensityFunction<ZipcodeRecord> locationPDF = new CustomerLocationPDF(inputData.getZipcodeTable(), + store, Constants.AVERAGE_CUSTOMER_STORE_DISTANCE); + Sampler<ZipcodeRecord> locationSampler = RouletteWheelSampler.create(inputData.getZipcodeTable(), locationPDF, seedFactory); + locationSamplers.put(store, locationSampler); + } + + return new ConditionalSampler<ZipcodeRecord, Store>() + { + public ZipcodeRecord sample(Store store) throws Exception + { + return locationSamplers.get(store).sample(); + } + + public Sampler<ZipcodeRecord> fixConditional(Store store) + { + return locationSamplers.get(store); + } + }; + } + + public Sampler<Customer> build() + { + ProbabilityDensityFunction<Store> storePDF = new CustomerStorePDF(stores); + + Sampler<Integer> idSampler = new SequenceSampler(); + Sampler<String> firstNameSampler = RouletteWheelSampler.create(inputData.getNames().getFirstNames(), seedFactory); + Sampler<String> lastNameSampler = RouletteWheelSampler.create(inputData.getNames().getLastNames(), seedFactory); + Sampler<Store> storeSampler = RouletteWheelSampler.create(stores, storePDF, seedFactory); + + return new CustomerSampler(idSampler, firstNameSampler, lastNameSampler, storeSampler, buildLocationSampler()); + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerStorePDF.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerStorePDF.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerStorePDF.java new file mode 100644 index 0000000..400b02a --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/customer/CustomerStorePDF.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.generators.customer; + +import java.util.List; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store; +import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction; + +public class CustomerStorePDF implements ProbabilityDensityFunction<Store> +{ + double populationSum = 0.0; + + public CustomerStorePDF(List<Store> stores) + { + for(Store store : stores) + { + populationSum += (double) store.getLocation().getPopulation(); + } + } + + @Override + public double probability(Store store) + { + return ((double) store.getLocation().getPopulation()) / populationSum; + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/purchase/MarkovPurchasingModel.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/purchase/MarkovPurchasingModel.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/purchase/MarkovPurchasingModel.java new file mode 100644 index 0000000..cae8794 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/generators/purchase/MarkovPurchasingModel.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.generators.purchase; + +import java.util.Map; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Product; +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels.MarkovModel; +import org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels.MarkovProcess; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; + +public class MarkovPurchasingModel implements PurchasingModel +{ + + private static final long serialVersionUID = 3098355461347511619L; + ImmutableMap<String, MarkovModel<Product>> productCategoryProfiles; + + public MarkovPurchasingModel(Map<String, MarkovModel<Product>> productCategoryProfiles) + { + this.productCategoryProfiles = ImmutableMap.copyOf(productCategoryProfiles); + } + + @Override + public ImmutableSet<String> getProductCategories() + { + return productCategoryProfiles.keySet(); + } + + public MarkovModel<Product> getProfile(String productCategory) + { + return productCategoryProfiles.get(productCategory); + } + + @Override + public PurchasingProcesses buildProcesses(SeedFactory seedFactory) + { + Map<String, Sampler<Product>> processes = Maps.newHashMap(); + for(String category : getProductCategories()) + { + MarkovModel<Product> model = getProfile(category); + processes.put(category, new MarkovProcess<Product>(model, seedFactory)); + } + + return new PurchasingProcesses(processes); + } +}
