BIGTOP-1783: Import BigPetStore Data Generator into BigTop
Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/5646c87d Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/5646c87d Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/5646c87d Branch: refs/heads/master Commit: 5646c87d7a30300a3924adb9db781f0b9214ff88 Parents: 92bd683 Author: RJ Nowling <[email protected]> Authored: Fri Mar 27 16:10:20 2015 -0500 Committer: RJ Nowling <[email protected]> Committed: Mon Mar 30 12:03:19 2015 -0500 ---------------------------------------------------------------------- .../bigpetstore-data-generator/README.md | 63 + .../bigpetstore-data-generator/build.gradle | 63 + .../MonteCarloExponentialSamplingExample.groovy | 44 + .../MonteCarloGaussianSamplingExample.groovy | 45 + .../bigpetstore-data-generator/settings.gradle | 16 + .../bigpetstore/datagenerator/Constants.java | 96 + .../datagenerator/CustomerGenerator.java | 41 + .../bigpetstore/datagenerator/DataLoader.java | 65 + .../datagenerator/PurchasingModelGenerator.java | 40 + .../datagenerator/StoreGenerator.java | 38 + .../datagenerator/TransactionGenerator.java | 43 + .../bigpetstore/datagenerator/cli/Driver.java | 230 + .../datagenerator/cli/Simulation.java | 166 + .../datagenerator/datamodels/Customer.java | 58 + .../datagenerator/datamodels/Pair.java | 64 + .../datagenerator/datamodels/PetSpecies.java | 22 + .../datagenerator/datamodels/Product.java | 96 + .../datagenerator/datamodels/Store.java | 51 + .../datagenerator/datamodels/Transaction.java | 68 + .../datamodels/inputs/InputData.java | 56 + .../datagenerator/datamodels/inputs/Names.java | 46 + .../datamodels/inputs/ProductCategory.java | 112 + .../inputs/ProductCategoryBuilder.java | 133 + .../datamodels/inputs/ZipcodeRecord.java | 90 + .../datagenerator/datareaders/NameReader.java | 62 + .../datareaders/ProductsReader.java | 152 + .../datareaders/ZipcodeReader.java | 193 + .../datagenerator/framework/SeedFactory.java | 38 + .../framework/markovmodels/MarkovModel.java | 41 + .../markovmodels/MarkovModelBuilder.java | 53 + .../framework/markovmodels/MarkovProcess.java | 69 + .../ConditionalProbabilityDensityFunction.java | 23 + .../framework/pdfs/DiscretePDF.java | 46 + .../framework/pdfs/ExponentialPDF.java | 31 + .../framework/pdfs/GaussianPDF.java | 37 + .../datagenerator/framework/pdfs/JointPDF.java | 49 + .../pdfs/ProbabilityDensityFunction.java | 21 + .../framework/pdfs/UniformPDF.java | 36 + .../BoundedMultiModalGaussianSampler.java | 58 + .../framework/samplers/ConditionalSampler.java | 23 + .../samplers/DoubleSequenceSampler.java | 70 + .../framework/samplers/ExponentialSampler.java | 37 + .../framework/samplers/GaussianSampler.java | 39 + .../framework/samplers/MonteCarloSampler.java | 55 + .../samplers/RouletteWheelSampler.java | 111 + .../framework/samplers/Sampler.java | 21 + .../framework/samplers/SequenceSampler.java | 70 + .../samplers/StatefulMonteCarloSampler.java | 60 + .../framework/samplers/UniformIntSampler.java | 43 + .../framework/samplers/UniformSampler.java | 46 + .../wfs/ConditionalWeightFunction.java | 23 + .../framework/wfs/WeightFunction.java | 21 + .../customer/CustomerLocationPDF.java | 69 + .../generators/customer/CustomerSampler.java | 56 + .../customer/CustomerSamplerBuilder.java | 85 + .../generators/customer/CustomerStorePDF.java | 41 + .../purchase/MarkovPurchasingModel.java | 64 + .../purchase/MarkovPurchasingModelSampler.java | 47 + .../ProductCategoryMarkovModelSampler.java | 119 + .../purchase/ProductCategoryPDFSampler.java | 117 + .../generators/purchase/PurchasingModel.java | 29 + .../purchase/PurchasingModelSamplerBuilder.java | 152 + .../purchase/PurchasingProcesses.java | 45 + .../purchase/StaticPurchasingModel.java | 61 + .../purchase/StaticPurchasingModelSampler.java | 49 + .../store/StoreLocationIncomePDF.java | 65 + .../store/StoreLocationPopulationPDF.java | 43 + .../generators/store/StoreSampler.java | 45 + .../generators/store/StoreSamplerBuilder.java | 57 + .../transaction/CategoryWeightFunction.java | 51 + .../transaction/CustomerInventory.java | 65 + .../transaction/CustomerInventoryBuilder.java | 69 + .../CustomerTransactionParameters.java | 73 + .../CustomerTransactionParametersBuilder.java | 58 + .../CustomerTransactionParametersSampler.java | 61 + ...omerTransactionParametersSamplerBuilder.java | 55 + .../transaction/ProductCategoryInventory.java | 58 + .../ProductCategoryUsageSimulator.java | 72 + .../ProductCategoryUsageTrajectory.java | 74 + .../ProposedPurchaseTimeSampler.java | 49 + .../TransactionPurchasesHiddenMarkovModel.java | 121 + .../TransactionPurchasesSamplerBuilder.java | 70 + .../transaction/TransactionSampler.java | 56 + .../transaction/TransactionSamplerBuilder.java | 95 + .../transaction/TransactionTimePDF.java | 45 + .../TransactionTimeSamplerBuilder.java | 56 + .../ACS_12_5YR_S1903/ACS_12_5YR_S1903.txt | 33 + .../ACS_12_5YR_S1903_metadata.csv | 123 + .../ACS_12_5YR_S1903_with_ann.csv | 33122 ++++ .../resources/input_data/namedb/LICENSE.txt | 274 + .../resources/input_data/namedb/data/data.dat | 129036 ++++++++++++++++ .../resources/input_data/namedb/namedb.info | 13 + .../resources/input_data/population_data.csv | 33090 ++++ .../input_data/product_categories.json | 440 + .../src/main/resources/input_data/zips.csv | 33179 ++++ .../datagenerator/datamodels/TestProduct.java | 77 + .../markovmodels/TestMarkovModelBuilder.java | 76 + .../markovmodels/TestMarkovProcess.java | 53 + .../TestBoundedMultiModalGaussianSampler.java | 50 + .../samplers/TestExponentialSampler.java | 41 + .../framework/samplers/TestGaussianSampler.java | 43 + .../samplers/TestRouletteWheelSampler.java | 71 + .../framework/samplers/TestSequenceSampler.java | 38 + .../samplers/TestUniformIntSampler.java | 60 + .../customer/TestCustomerLocationPDF.java | 58 + .../customer/TestCustomerSampler.java | 113 + .../customer/TestCustomerSamplerBuilder.java | 79 + .../TestProductCategoryMarkovModelSampler.java | 114 + .../purchase/TestPurchasingModelSampler.java | 106 + .../TestPurchasingModelSamplerBuilder.java | 106 + .../purchase/TestPurchasingProcesses.java | 70 + .../store/TestStoreLocationIncomePDF.java | 49 + .../store/TestStoreLocationPopulationPDF.java | 49 + .../generators/store/TestStoreSampler.java | 59 + .../store/TestStoreSamplerBuilder.java | 59 + .../transaction/TestCustomerInventory.java | 94 + .../TestCustomerInventoryBuilder.java | 91 + .../TestCustomerTransactionParameters.java | 69 + ...estCustomerTransactionParametersBuilder.java | 48 + ...estCustomerTransactionParametersSampler.java | 47 + ...omerTransactionParametersSamplerBuilder.java | 47 + .../TestProductCategoryInventory.java | 75 + .../TestProductCategoryUsageSimulator.java | 53 + .../TestProductCategoryUsageTrajectory.java | 70 + ...stTransactionPurchasesHiddenMarkovModel.java | 205 + .../transaction/TestTransactionTimePDF.java | 46 + 126 files changed, 237072 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/README.md ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/README.md b/bigtop-bigpetstore/bigpetstore-data-generator/README.md new file mode 100644 index 0000000..d34e5c6 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/README.md @@ -0,0 +1,63 @@ +BigPetStore Data Generator +========================== + +BigPetStore ... + +Data Generator ... + +======= +Building and Testing +-------------------- +We use the Gradle build system for the BPS data generator so you'll need +to install Gradle on your system. +Once that's done, you can use gradle to run the included unit tests +and build the data generator jar. + +To build: + + $ gradle build + +This will create several directories and a jar located at: + + build/libs/bigpetstore-data-generator-0.9.0-SNAPSHOT.jar + +Building automatically runs the included unit tests. If you would prefer +to just run the unit tests, you can do so by: + + $ gradle test + + +To clean up the build files, run: + + $ gradle clean + + +Running the Data Generator +-------------------------- +The data generator can be used as a library (for incorporating in +Hadoop or Spark applications) or using a command-line interface. +The data generator CLI requires several parameters. To get +descriptions: + + $ java -jar build/libs/bigpetstore-data-generator-0.9.0-SNAPSHOT.jar + +Here is an example for generating 10 stores, 1000 customers, 100 purchasing models, +and a year of transactions: + + $ java -jar build/libs/bigpetstore-data-generator-0.9.0-SNAPSHOT.jar generatedData/ 10 1000 100 365.0 + + +Groovy Drivers for Scripting +---------------------------- +Several Groovy example script drivers are included in the `groovy_example_drivers` directory. +Groovy scripts can be used to easily call and interact with classes in the data generator +jar without having to create separate Java projects or worry about compilation. I've found +them to be very useful for interactive exploration and validating my implementations +when unit tests alone aren't sufficient. + +To use Groovy scripts, you will need to have Groovy installed on your system. Build the +data generator as instructed above. Then run the scripts in the `groovy_example_drivers` +directory as so: + + $ groovy -classpath ../build/libs/bigpetstore-data-generator-0.9.0-SNAPSHOT.jar MonteCarloExponentialSamplingExample.groovy + http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/build.gradle ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/build.gradle b/bigtop-bigpetstore/bigpetstore-data-generator/build.gradle new file mode 100644 index 0000000..957529d --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/build.gradle @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +apply plugin: 'eclipse' +apply plugin: 'groovy' +apply plugin: 'java' + +group = 'org.apache.bigtop' +version = '0.9.0-SNAPSHOT' + +jar { + + from { + configurations.runtime.collect { + it.isDirectory() ? it : zipTree(it) + } + } + + manifest { + attributes 'Title': 'BigPetStore Data Generator', 'Version': version + attributes 'Main-Class': 'org.apache.bigtop.bigpetstore.datagenerator.cli.Driver' + } +} + +repositories { + mavenLocal() + mavenCentral() +} + +test { + // show standard out and error on console + testLogging.showStandardStreams = true + + // listen to events in the test execution lifecycle + beforeTest { descriptor -> + logger.lifecycle("Running test: " + descriptor) + } + + // listen to standard out and standard error of the test JVM(s) + onOutput { descriptor, event -> + logger.lifecycle("Test: " + descriptor + " produced standard out/err: " + event.message ) + } + +} + +dependencies { + compile 'com.google.guava:guava:18.0' + compile 'com.google.code.gson:gson:2.3' + + testCompile 'junit:junit:4.+' +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/groovy_example_drivers/MonteCarloExponentialSamplingExample.groovy ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/groovy_example_drivers/MonteCarloExponentialSamplingExample.groovy b/bigtop-bigpetstore/bigpetstore-data-generator/groovy_example_drivers/MonteCarloExponentialSamplingExample.groovy new file mode 100644 index 0000000..7f11fed --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/groovy_example_drivers/MonteCarloExponentialSamplingExample.groovy @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.MonteCarloSampler +import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ExponentialPDF +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.UniformSampler +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory + +averageValue = 2.0 + +seedFactory = new SeedFactory() +uniformSampler = new UniformSampler(0.0, 100.0, seedFactory) +pdf = new ExponentialPDF(1.0 / averageValue) + + +mcSampler = new MonteCarloSampler(uniformSampler, pdf, seedFactory) + +sample = mcSampler.sample() + +println("Sampled the value: " + sample) + +sampleSum = 0.0 +for(int i = 0; i < 10000; i++) +{ + sampleSum += mcSampler.sample() +} + +sampleAverage = sampleSum / 10000.0 + +println("Expected Average: " + averageValue) +println("Observed Average: " + sampleAverage) + http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/groovy_example_drivers/MonteCarloGaussianSamplingExample.groovy ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/groovy_example_drivers/MonteCarloGaussianSamplingExample.groovy b/bigtop-bigpetstore/bigpetstore-data-generator/groovy_example_drivers/MonteCarloGaussianSamplingExample.groovy new file mode 100644 index 0000000..ebffc91 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/groovy_example_drivers/MonteCarloGaussianSamplingExample.groovy @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.MonteCarloSampler +import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.GaussianPDF +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.UniformSampler +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory + +averageValue = 10.0 +stdValue = 2.0 + +seedFactory = new SeedFactory() +uniformSampler = new UniformSampler(-100.0, 100.0, seedFactory) +pdf = new GaussianPDF(averageValue, stdValue) + + +mcSampler = new MonteCarloSampler(uniformSampler, pdf, seedFactory) + +sample = mcSampler.sample() + +println("Sampled the value: " + sample) + +sampleSum = 0.0 +for(int i = 0; i < 100000; i++) +{ + sampleSum += mcSampler.sample() +} + +sampleAverage = sampleSum / 100000.0 + +println("Expected Average: " + averageValue) +println("Observed Average: " + sampleAverage) + http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/settings.gradle ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/settings.gradle b/bigtop-bigpetstore/bigpetstore-data-generator/settings.gradle new file mode 100644 index 0000000..cf398e8 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/settings.gradle @@ -0,0 +1,16 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +rootProject.name = "bigpetstore-data-generator" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/Constants.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/Constants.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/Constants.java new file mode 100644 index 0000000..90b363e --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/Constants.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator; + +import java.io.File; +import java.util.List; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair; + +import com.google.common.collect.ImmutableList; + +public class Constants +{ + public static enum PurchasingModelType + { + STATIC, + DYNAMIC; + } + + public static enum DistributionType + { + BOUNDED_MULTIMODAL_GAUSSIAN, + EXPONENTIAL; + } + + public static final File COORDINATES_FILE = new File("zips.csv"); + public static final File INCOMES_FILE = new File("ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv"); + public static final File POPULATION_FILE = new File("population_data.csv"); + + public static final File NAMEDB_FILE = new File("namedb/data/data.dat"); + + public static final File PRODUCTS_FILE = new File("product_categories.json"); + + public static final double INCOME_SCALING_FACTOR = 100.0; + + public static final int MIN_PETS = 1; + public static final int MAX_PETS = 10; + + public static final List<Pair<Double, Double>> TRANSACTION_TRIGGER_TIME_GAUSSIANS = ImmutableList.of(Pair.create(5.0, 2.0)); + public static final List<Pair<Double, Double>> PURCHASE_TRIGGER_TIME_GAUSSIANS = ImmutableList.of(Pair.create(10.0, 4.0)); + + public static final double TRANSACTION_TRIGGER_TIME_MAX = 10.0; + public static final double TRANSACTION_TRIGGER_TIME_MIN = 1.0; + + public static final double PURCHASE_TRIGGER_TIME_MAX = 20.0; + public static final double PURCHASE_TRIGGER_TIME_MIN = 1.0; + + public static final double AVERAGE_CUSTOMER_STORE_DISTANCE = 5.0; // miles + + public static final PurchasingModelType PURCHASING_MODEL_TYPE = PurchasingModelType.DYNAMIC; + + public static final List<Pair<Double, Double>> PRODUCT_MSM_FIELD_WEIGHT_GAUSSIANS = ImmutableList.of(Pair.create(0.15, 0.1), Pair.create(0.85, 0.1)); + public static final double PRODUCT_MSM_FIELD_WEIGHT_LOWERBOUND = 0.05; + public static final double PRODUCT_MSM_FIELD_WEIGHT_UPPERBOUND = 0.95; + + public static final List<Pair<Double, Double>> PRODUCT_MSM_FIELD_SIMILARITY_WEIGHT_GAUSSIANS = ImmutableList.of(Pair.create(0.15, 0.1), Pair.create(0.85, 0.1)); + public static final double PRODUCT_MSM_FIELD_SIMILARITY_WEIGHT_LOWERBOUND = 0.05; + public static final double PRODUCT_MSM_FIELD_SIMILARITY_WEIGHT_UPPERBOUND = 0.95; + + public static final List<Pair<Double, Double>> PRODUCT_MSM_LOOPBACK_WEIGHT_GAUSSIANS = ImmutableList.of(Pair.create(0.25, 0.1), Pair.create(0.75, 0.1)); + public static final double PRODUCT_MSM_LOOPBACK_WEIGHT_LOWERBOUND = 0.05; + public static final double PRODUCT_MSM_LOOPBACK_WEIGHT_UPPERBOUND = 0.95; + + public static final DistributionType STATIC_PURCHASING_MODEL_FIELD_WEIGHT_DISTRIBUTION_TYPE = DistributionType.BOUNDED_MULTIMODAL_GAUSSIAN; + public static final DistributionType STATIC_PURCHASING_MODEL_FIELD_VALUE_WEIGHT_DISTRIBUTION_TYPE = DistributionType.EXPONENTIAL; + + public static final List<Pair<Double, Double>> STATIC_FIELD_WEIGHT_GAUSSIANS = ImmutableList.of(Pair.create(0.15, 0.1), Pair.create(0.85, 0.1)); + public static final double STATIC_FIELD_WEIGHT_LOWERBOUND = 0.05; + public static final double STATIC_FIELD_WEIGHT_UPPERBOUND = 0.95; + + public static final List<Pair<Double, Double>> STATIC_FIELD_VALUE_WEIGHT_GAUSSIANS = ImmutableList.of(Pair.create(0.15, 0.1), Pair.create(0.85, 0.1)); + public static final double STATIC_FIELD_VALUE_WEIGHT_LOWERBOUND = 0.05; + public static final double STATIC_FIELD_VALUE_WEIGHT_UPPERBOUND = 0.95; + + public static final double STATIC_FIELD_WEIGHT_EXPONENTIAL = 0.25; + public static final double STATIC_FIELD_VALUE_WEIGHT_EXPONENTIAL = 2.0; + + + public static final String PRODUCT_QUANTITY = "size"; + public static final String PRODUCT_CATEGORY = "category"; + + public static final double STOP_CATEGORY_WEIGHT = 0.01; +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/CustomerGenerator.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/CustomerGenerator.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/CustomerGenerator.java new file mode 100644 index 0000000..799b2e3 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/CustomerGenerator.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator; + +import java.util.List; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Customer; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.InputData; +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler; +import org.apache.bigtop.bigpetstore.datagenerator.generators.customer.CustomerSamplerBuilder; + +public class CustomerGenerator +{ + final Sampler<Customer> sampler; + + public CustomerGenerator(InputData inputData, List<Store> stores, SeedFactory seedFactory) + { + CustomerSamplerBuilder builder = new CustomerSamplerBuilder(stores, inputData, seedFactory); + sampler = builder.build(); + } + + public Customer generate() throws Exception + { + return sampler.sample(); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/DataLoader.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/DataLoader.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/DataLoader.java new file mode 100644 index 0000000..fbd158d --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/DataLoader.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.InputStream; +import java.util.Collection; +import java.util.List; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.InputData; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.Names; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ProductCategory; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.bigpetstore.datagenerator.datareaders.NameReader; +import org.apache.bigtop.bigpetstore.datagenerator.datareaders.ProductsReader; +import org.apache.bigtop.bigpetstore.datagenerator.datareaders.ZipcodeReader; + +public class DataLoader +{ + private InputStream getResource(File filename) throws Exception + { + InputStream stream = getClass().getResourceAsStream("/input_data/" + filename); + return new BufferedInputStream(stream); + } + + public InputData loadData() throws Exception + { + + System.out.println("Reading zipcode data"); + ZipcodeReader zipcodeReader = new ZipcodeReader(); + zipcodeReader.setCoordinatesFile(getResource(Constants.COORDINATES_FILE)); + zipcodeReader.setIncomesFile(getResource(Constants.INCOMES_FILE)); + zipcodeReader.setPopulationFile(getResource(Constants.POPULATION_FILE)); + List<ZipcodeRecord> zipcodeTable = zipcodeReader.readData(); + System.out.println("Read " + zipcodeTable.size() + " zipcode entries"); + + System.out.println("Reading name data"); + NameReader nameReader = new NameReader(getResource(Constants.NAMEDB_FILE)); + Names names = nameReader.readData(); + System.out.println("Read " + names.getFirstNames().size() + " first names and " + names.getLastNames().size() + " last names"); + + System.out.println("Reading product data"); + ProductsReader reader = new ProductsReader(getResource(Constants.PRODUCTS_FILE)); + Collection<ProductCategory> productCategories = reader.readData(); + System.out.println("Read " + productCategories.size() + " product categories"); + + InputData inputData = new InputData(zipcodeTable, names, productCategories); + + return inputData; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/PurchasingModelGenerator.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/PurchasingModelGenerator.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/PurchasingModelGenerator.java new file mode 100644 index 0000000..6994d9e --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/PurchasingModelGenerator.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator; + +import java.util.Collection; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ProductCategory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler; +import org.apache.bigtop.bigpetstore.datagenerator.generators.purchase.PurchasingModel; +import org.apache.bigtop.bigpetstore.datagenerator.generators.purchase.PurchasingModelSamplerBuilder; + +public class PurchasingModelGenerator +{ + final Sampler<? extends PurchasingModel> sampler; + + public PurchasingModelGenerator(Collection<ProductCategory> productCategories, SeedFactory seedFactory) throws Exception + { + PurchasingModelSamplerBuilder builder = new PurchasingModelSamplerBuilder(productCategories, seedFactory); + sampler = builder.build(); + } + + public PurchasingModel generate() throws Exception + { + return sampler.sample(); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/StoreGenerator.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/StoreGenerator.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/StoreGenerator.java new file mode 100644 index 0000000..df6d759 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/StoreGenerator.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.InputData; +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler; +import org.apache.bigtop.bigpetstore.datagenerator.generators.store.StoreSamplerBuilder; + +public class StoreGenerator +{ + final Sampler<Store> sampler; + + public StoreGenerator(InputData inputData, SeedFactory seedFactory) + { + StoreSamplerBuilder builder = new StoreSamplerBuilder(inputData.getZipcodeTable(), seedFactory); + sampler = builder.build(); + } + + public Store generate() throws Exception + { + return sampler.sample(); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/TransactionGenerator.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/TransactionGenerator.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/TransactionGenerator.java new file mode 100644 index 0000000..8f78bb7 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/TransactionGenerator.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator; + +import java.util.Collection; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Customer; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Transaction; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ProductCategory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler; +import org.apache.bigtop.bigpetstore.datagenerator.generators.purchase.PurchasingModel; +import org.apache.bigtop.bigpetstore.datagenerator.generators.transaction.TransactionSamplerBuilder; + +public class TransactionGenerator +{ + Sampler<Transaction> sampler; + + public TransactionGenerator(Customer customer, PurchasingModel profile, + Collection<ProductCategory> productCategories, SeedFactory seedFactory) throws Exception + { + sampler = new TransactionSamplerBuilder(productCategories, + customer, profile, seedFactory).build(); + } + + public Transaction generate() throws Exception + { + return sampler.sample(); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Driver.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Driver.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Driver.java new file mode 100644 index 0000000..6dc3b59 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Driver.java @@ -0,0 +1,230 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.cli; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.util.Collection; +import java.util.Random; + +import org.apache.bigtop.bigpetstore.datagenerator.DataLoader; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Product; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Transaction; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.InputData; + + +public class Driver +{ + int nStores; + int nCustomers; + int nPurchasingModels; + double simulationTime; + long seed; + File outputDir; + + static final int NPARAMS = 6; + + private void printUsage() + { + String usage = "BigPetStore Data Generator\n" + + "\n" + + "Usage: java -jar bps-data-generator-v0.2.java outputDir nStores nCustomers nPurchasingModels simulationLength [seed]\n" + + "\n" + + "outputDir - (string) directory to write files\n" + + "nStores - (int) number of stores to generate\n" + + "nCustomers - (int) number of customers to generate\n" + + "nPurchasingModels - (int) number of purchasing models to generate\n" + + "simulationLength - (float) number of days to simulate\n" + + "seed - (long) seed for RNG. If not given, one is reandomly generated.\n"; + + System.out.println(usage); + } + + public void parseArgs(String[] args) + { + if(args.length != NPARAMS && args.length != (NPARAMS - 1)) + { + printUsage(); + System.exit(1); + } + + int i = -1; + + outputDir = new File(args[++i]); + if(! outputDir.exists()) + { + System.err.println("Given path (" + args[i] + ") does not exist.\n"); + printUsage(); + System.exit(1); + } + + if(! outputDir.isDirectory()) + { + System.err.println("Given path (" + args[i] + ") is not a directory.\n"); + printUsage(); + System.exit(1); + } + + try + { + nStores = Integer.parseInt(args[++i]); + } + catch(Exception e) + { + System.err.println("Unable to parse '" + args[i] + "' as an integer for nStores.\n"); + printUsage(); + System.exit(1); + } + + try + { + nCustomers = Integer.parseInt(args[++i]); + } + catch(Exception e) + { + System.err.println("Unable to parse '" + args[i] + "' as an integer for nCustomers.\n"); + printUsage(); + System.exit(1); + } + + try + { + nPurchasingModels = Integer.parseInt(args[++i]); + } + catch(Exception e) + { + System.err.println("Unable to parse '" + args[i] + "' as an integer for nPurchasingModels.\n"); + printUsage(); + System.exit(1); + } + + try + { + simulationTime = Double.parseDouble(args[++i]); + } + catch(Exception e) + { + System.err.println("Unable to parse '" + args[i] + "' as a float for simulationLength.\n"); + printUsage(); + System.exit(1); + } + + if(args.length == NPARAMS) + { + try + { + seed = Long.parseLong(args[++i]); + } + catch(Exception e) + { + System.err.println("Unable to parse '" + args[i] + "' as a long for the seed.\n"); + printUsage(); + System.exit(1); + } + } + else + { + seed = (new Random()).nextLong(); + } + } + + private void writeTransactions(Collection<Transaction> transactions) throws Exception + { + File outputFile = new File(outputDir.toString() + File.separator + "transactions.txt"); + System.out.println(outputFile.toString()); + OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile)); + + for(Transaction transaction : transactions) + { + for(Product product : transaction.getProducts()) + { + String record = transaction.getId() + ","; + record += transaction.getDateTime() + ","; + record += transaction.getStore().getId() + ","; + record += transaction.getStore().getLocation().getZipcode() + ","; + record += transaction.getStore().getLocation().getCity() + ","; + record += transaction.getStore().getLocation().getState() + ","; + record += transaction.getCustomer().getId() + ","; + Pair<String, String> name = transaction.getCustomer().getName(); + record += name.getFirst() + " " + name.getSecond() + ","; + record += transaction.getCustomer().getLocation().getZipcode() + ","; + record += transaction.getCustomer().getLocation().getCity() + ","; + record += transaction.getCustomer().getLocation().getState() + ","; + record += product.toString() + "\n"; + + outputStream.write(record.getBytes()); + } + } + + outputStream.close(); + } + + public Simulation buildSimulation(InputData inputData) + { + return new Simulation(inputData, nStores, nCustomers, nPurchasingModels, simulationTime, seed); + } + + private void run(InputData inputData) throws Exception + { + Simulation simulation = buildSimulation(inputData); + + simulation.simulate(); + + writeTransactions(simulation.getTransactions()); + } + public void run(String[] args) throws Exception + { + parseArgs(args); + + InputData inputData = (new DataLoader()).loadData(); + + run(inputData); + } + + public static void main(String[] args) throws Exception + { + Driver driver = new Driver(); + driver.run(args); + } + + public Double getSimulationLength() + { + return simulationTime; + } + + public int getNCustomers() + { + return nCustomers; + } + + public long getSeed() + { + return seed; + } + + public int getNStores() + { + return nStores; + } + + public File getOutputDir() + { + return outputDir; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Simulation.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Simulation.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Simulation.java new file mode 100644 index 0000000..b3e07af --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Simulation.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.cli; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Vector; + +import org.apache.bigtop.bigpetstore.datagenerator.CustomerGenerator; +import org.apache.bigtop.bigpetstore.datagenerator.PurchasingModelGenerator; +import org.apache.bigtop.bigpetstore.datagenerator.StoreGenerator; +import org.apache.bigtop.bigpetstore.datagenerator.TransactionGenerator; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Customer; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Transaction; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.InputData; +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.RouletteWheelSampler; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler; +import org.apache.bigtop.bigpetstore.datagenerator.generators.purchase.PurchasingModel; + +import com.google.common.collect.Lists; + +public class Simulation +{ + InputData inputData; + SeedFactory seedFactory; + int nStores; + int nCustomers; + int nPurchasingModels; + double simulationTime; + + List<Store> stores; + List<Customer> customers; + Sampler<PurchasingModel> purchasingModelSampler; + List<Transaction> transactions; + + public Simulation(InputData inputData, int nStores, int nCustomers, int nPurchasingModels, double simulationTime, long seed) + { + this.inputData = inputData; + this.nStores = nStores; + this.nCustomers = nCustomers; + this.nPurchasingModels = nPurchasingModels; + this.simulationTime = simulationTime; + seedFactory = new SeedFactory(seed); + } + + public void generateStores() throws Exception + { + System.out.println("Generating stores"); + StoreGenerator storeGenerator = new StoreGenerator(inputData, seedFactory); + + stores = new Vector<Store>(); + for(int i = 0; i < nStores; i++) + { + Store store = storeGenerator.generate(); + stores.add(store); + } + + stores = Collections.unmodifiableList(stores); + + System.out.println("Generated " + stores.size() + " stores"); + } + + public void generateCustomers() throws Exception + { + System.out.println("Generating customers"); + CustomerGenerator generator = new CustomerGenerator(inputData, stores, seedFactory); + + customers = new Vector<Customer>(); + for(int i = 0; i < nCustomers; i++) + { + Customer customer = generator.generate(); + customers.add(customer); + } + + customers = Collections.unmodifiableList(customers); + + System.out.println("Generated " + customers.size() + " customers"); + } + + public void generatePurchasingProfiles() throws Exception + { + System.out.println("Generating purchasing profiles"); + PurchasingModelGenerator generator = new PurchasingModelGenerator(inputData.getProductCategories(), seedFactory); + + Collection<PurchasingModel> purchasingProfiles = new Vector<PurchasingModel>(); + for(int i = 0; i < nPurchasingModels; i++) + { + PurchasingModel profile = generator.generate(); + purchasingProfiles.add(profile); + } + + System.out.println("Generated " + purchasingProfiles.size() + " purchasing profiles"); + + purchasingModelSampler = RouletteWheelSampler.createUniform(purchasingProfiles, seedFactory); + } + + public void generateTransactions() throws Exception + { + System.out.println("Generating transactions"); + transactions = Lists.newArrayList(); + + for(int i = 0; i < nCustomers; i++) + { + Customer customer = customers.get(i); + PurchasingModel profile = purchasingModelSampler.sample(); + + TransactionGenerator generator = new TransactionGenerator(customer, + profile, inputData.getProductCategories(), seedFactory); + + while(true) + { + Transaction transaction = generator.generate(); + + if(transaction.getDateTime() > simulationTime) + break; + transactions.add(transaction); + } + } + + System.out.println("Generated " + transactions.size() + " transactions"); + } + + public void simulate() throws Exception + { + generateStores(); + generateCustomers(); + generatePurchasingProfiles(); + generateTransactions(); + } + + public List<Store> getStores() + { + return stores; + } + + public List<Customer> getCustomers() + { + return customers; + } + + public List<Transaction> getTransactions() + { + return transactions; + } + + public InputData getInputData() + { + return inputData; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Customer.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Customer.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Customer.java new file mode 100644 index 0000000..cda0656 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Customer.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels; + +import java.io.Serializable; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord; + +public class Customer implements Serializable +{ + private static final long serialVersionUID = 5739806281335931258L; + + int id; + Pair<String, String> name; + ZipcodeRecord location; + Store store; + + public Customer(int id, Pair<String, String> name, Store store, ZipcodeRecord location) + { + this.id = id; + this.name = name; + this.location = location; + this.store = store; + } + + public int getId() + { + return id; + } + + public Pair<String, String> getName() + { + return name; + } + + public ZipcodeRecord getLocation() + { + return location; + } + + public Store getStore() + { + return store; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Pair.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Pair.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Pair.java new file mode 100644 index 0000000..9f3053b --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Pair.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.Lists; + +public class Pair<A, B> implements Serializable +{ + private static final long serialVersionUID = -105021821052665898L; + + A first; + B second; + + public Pair(A first, B second) + { + this.first = first; + this.second = second; + } + + public A getFirst() + { + return first; + } + + public B getSecond() + { + return second; + } + + public static <A, B> Pair<A, B> create(A first, B second) + { + return new Pair<A, B>(first, second); + } + + public static <A, B> List<Pair<A, B>> create(Map<A, B> map) + { + List<Pair<A, B>> list = Lists.newArrayListWithExpectedSize(map.size()); + for(Map.Entry<A, B> entry : map.entrySet()) + list.add(Pair.create(entry.getKey(), entry.getValue())); + return list; + } + + public String toString() + { + return "Pair(" + first + ", " + second + ")"; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/PetSpecies.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/PetSpecies.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/PetSpecies.java new file mode 100644 index 0000000..d4bab79 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/PetSpecies.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels; + +public enum PetSpecies +{ + DOG, + CAT; +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Product.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Product.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Product.java new file mode 100644 index 0000000..1d4e074 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Product.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels; + +import java.io.Serializable; +import java.util.Map; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +public class Product implements Serializable +{ + private static final long serialVersionUID = 4519472063058037956L; + + ImmutableMap<String, Object> fields; + + public Product(Map<String, Object> fields) + { + this.fields = ImmutableMap.copyOf(fields); + } + + public ImmutableSet<String> getFieldNames() + { + return fields.keySet(); + } + + public Object getFieldValue(String fieldName) + { + return fields.get(fieldName); + } + + public String getFieldValueAsString(String fieldName) + { + return fields.get(fieldName).toString(); + } + + public Double getFieldValueAsDouble(String fieldName) + { + Object value = getFieldValue(fieldName); + try + { + Double doubleValue = (Double) value; + return doubleValue; + } + catch(ClassCastException e) + { + return null; + } + } + + public Long getFieldValueAsLong(String fieldName) + { + Object value = getFieldValue(fieldName); + try + { + Long longValue = (Long) value; + return longValue; + } + catch(ClassCastException e) + { + try + { + Integer intValue = (Integer) value; + return new Long(intValue); + } + catch(ClassCastException f) + { + return null; + } + } + } + + public String toString() + { + String str = ""; + for(Map.Entry<String, Object> entry : fields.entrySet()) + { + str += entry.getKey() + "=" + entry.getValue() + ";"; + } + + return str; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Store.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Store.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Store.java new file mode 100644 index 0000000..50e333a --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Store.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels; + +import java.io.Serializable; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord; + +public class Store implements Serializable +{ + private static final long serialVersionUID = 2347066623022747969L; + + int id; + String name; + ZipcodeRecord location; + + public Store(int id, String name, ZipcodeRecord location) + { + this.id = id; + this.name = name; + this.location = location; + } + + public int getId() + { + return id; + } + + public String getName() + { + return name; + } + + public ZipcodeRecord getLocation() + { + return location; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Transaction.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Transaction.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Transaction.java new file mode 100644 index 0000000..6a78e44 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Transaction.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels; + +import java.io.Serializable; +import java.util.List; + +import com.google.common.collect.ImmutableList; + +public class Transaction implements Serializable +{ + private static final long serialVersionUID = 103133601154354349L; + + final int id; + final Customer customer; + final Store store; + final Double dateTime; + final ImmutableList<Product> products; + + public Transaction(int id, Customer customer, Store store, Double dateTime, List<Product> products) + { + this.id = id; + this.customer = customer; + this.store = store; + this.dateTime = dateTime; + this.products = ImmutableList.copyOf(products); + } + + public int getId() + { + return id; + } + + public Customer getCustomer() + { + return customer; + } + + public Store getStore() + { + return store; + } + + public Double getDateTime() + { + return dateTime; + } + + public ImmutableList<Product> getProducts() + { + return products; + } + + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/InputData.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/InputData.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/InputData.java new file mode 100644 index 0000000..def554c --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/InputData.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs; + +import java.io.Serializable; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +public class InputData implements Serializable +{ + private static final long serialVersionUID = 9078989799806707788L; + + List<ZipcodeRecord> zipcodeTable; + Names names; + Collection<ProductCategory> productCategories; + + public InputData(List<ZipcodeRecord> zipcodeTable, + Names names, + Collection<ProductCategory> productCategories) + { + this.zipcodeTable = Collections.unmodifiableList(zipcodeTable); + this.names = names; + this.productCategories = Collections.unmodifiableCollection(productCategories); + } + + public List<ZipcodeRecord> getZipcodeTable() + { + return zipcodeTable; + } + + public Names getNames() + { + return names; + } + + public Collection<ProductCategory> getProductCategories() + { + return productCategories; + } + + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/Names.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/Names.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/Names.java new file mode 100644 index 0000000..9402e02 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/Names.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs; + +import java.io.Serializable; +import java.util.Map; + +import com.google.common.collect.ImmutableMap; + +public class Names implements Serializable +{ + private static final long serialVersionUID = 2731634747628534453L; + + final ImmutableMap<String, Double> firstNames; + final ImmutableMap<String, Double> lastNames; + + public Names(Map<String, Double> firstNames, + Map<String, Double> lastNames) + { + this.firstNames = ImmutableMap.copyOf(firstNames); + this.lastNames = ImmutableMap.copyOf(lastNames); + } + + public ImmutableMap<String, Double> getFirstNames() + { + return firstNames; + } + + public ImmutableMap<String, Double> getLastNames() + { + return lastNames; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategory.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategory.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategory.java new file mode 100644 index 0000000..db40288 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategory.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs; + +import java.io.Serializable; +import java.util.List; +import java.util.Set; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.PetSpecies; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Product; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +public class ProductCategory implements Serializable +{ + private static final long serialVersionUID = -7638076590334497836L; + + String categoryLabel; + ImmutableSet<PetSpecies> applicableSpecies; + ImmutableSet<String> fieldNames; + boolean triggerTransaction; + double dailyUsageRate; + double amountUsedPerPetAverage; + double amountUsedPerPetVariance; + double triggerTransactionRate; + double triggerPurchaseRate; + ImmutableList<Product> products; + + public ProductCategory(String categoryLabel, Set<PetSpecies> species, Set<String> fieldNames, + boolean triggerTransaction, double dailyUsageRate, double amountUsedPerPetAverage, + double amountUsedPerPetVariance, double triggerTransactionRate, + double triggerPurchaseRate, List<Product> products) + { + this.categoryLabel = categoryLabel; + this.applicableSpecies = ImmutableSet.copyOf(species); + this.fieldNames = ImmutableSet.copyOf(fieldNames); + this.triggerTransaction = triggerTransaction; + this.dailyUsageRate = dailyUsageRate; + this.amountUsedPerPetAverage = amountUsedPerPetAverage; + this.amountUsedPerPetVariance = amountUsedPerPetVariance; + this.triggerTransactionRate = triggerTransactionRate; + this.triggerPurchaseRate = triggerPurchaseRate; + this.products = ImmutableList.copyOf(products); + } + + public String getCategoryLabel() + { + return categoryLabel; + } + + public ImmutableSet<PetSpecies> getApplicableSpecies() + { + return applicableSpecies; + } + + public ImmutableSet<String> getFieldNames() + { + return fieldNames; + } + public Boolean getTriggerTransaction() + { + return triggerTransaction; + } + + public Double getDailyUsageRate() + { + return dailyUsageRate; + } + + public Double getBaseAmountUsedAverage() + { + return amountUsedPerPetAverage; + } + + public Double getBaseAmountUsedVariance() + { + return amountUsedPerPetVariance; + } + + public Double getTransactionTriggerRate() + { + return triggerTransactionRate; + } + + public Double getPurchaseTriggerRate() + { + return triggerPurchaseRate; + } + + public ImmutableList<Product> getProducts() + { + return products; + } + + + + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategoryBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategoryBuilder.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategoryBuilder.java new file mode 100644 index 0000000..ee5fd62 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategoryBuilder.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs; + +import java.util.List; +import java.util.Set; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.PetSpecies; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Product; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +public class ProductCategoryBuilder +{ + String categoryLabel; + Set<PetSpecies> applicableSpecies; + Set<String> fieldNames; + Boolean triggerTransaction; + Double dailyUsageRate; + Double amountUsedPerPetAverage; + Double amountUsedPerPetVariance; + Double triggerTransactionRate; + Double triggerPurchaseRate; + List<Product> products; + + public ProductCategoryBuilder() + { + applicableSpecies = Sets.newHashSet(); + fieldNames = Sets.newHashSet(); + products = Lists.newArrayList(); + + dailyUsageRate = 0.0; + amountUsedPerPetAverage = 0.0; + amountUsedPerPetVariance = 0.0; + triggerTransactionRate = 0.0; + triggerPurchaseRate = 0.0; + triggerTransaction = false; + categoryLabel = null; + } + + public void setCategory(String category) + { + this.categoryLabel = category; + } + + public void setTriggerTransaction(Boolean triggerTransaction) + { + this.triggerTransaction = triggerTransaction; + } + + public void setDailyUsageRate(Double dailyUsageRate) + { + this.dailyUsageRate = dailyUsageRate; + } + + public void setAmountUsedPetPetAverage(Double baseAmountUsedAverage) + { + this.amountUsedPerPetAverage = baseAmountUsedAverage; + } + + public void setAmountUsedPetPetVariance(Double baseAmountUsedVariance) + { + this.amountUsedPerPetVariance = baseAmountUsedVariance; + } + + public void setTriggerTransactionRate(Double triggerTransactionRate) + { + this.triggerTransactionRate = triggerTransactionRate; + } + + public void setTriggerPurchaseRate(Double triggerPurchaseRate) + { + this.triggerPurchaseRate = triggerPurchaseRate; + } + + public void addApplicableSpecies(PetSpecies species) + { + this.applicableSpecies.add(species); + } + + public void addFieldName(String fieldName) + { + this.fieldNames.add(fieldName); + } + + public void addProduct(Product product) + { + this.products.add(product); + } + + protected boolean validateProducts() + { + for(Product product : products) + { + for(String fieldName : product.getFieldNames()) + { + if(!fieldNames.contains(fieldName)) + return false; + } + + for(String fieldName : fieldNames) + { + if(!product.getFieldNames().contains(fieldName)) + return false; + } + } + + return true; + } + + public ProductCategory build() + { + validateProducts(); + + return new ProductCategory(categoryLabel, applicableSpecies, fieldNames, triggerTransaction, + dailyUsageRate, amountUsedPerPetAverage, amountUsedPerPetVariance, triggerTransactionRate, + triggerPurchaseRate, products); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ZipcodeRecord.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ZipcodeRecord.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ZipcodeRecord.java new file mode 100644 index 0000000..e74df22 --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ZipcodeRecord.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs; + +import java.io.Serializable; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Pair; + +public class ZipcodeRecord implements Serializable +{ + private static final long serialVersionUID = 1769986686070108470L; + + final String zipcode; + final Pair<Double, Double> coordinates; + final String city; + final String state; + final double medianHouseholdIncome; + final long population; + + public ZipcodeRecord(String zipcode, Pair<Double, Double> coordinates, + String city, String state, double medianHouseholdIncome, long population) + { + this.city = city; + this.state = state; + this.zipcode = zipcode; + this.coordinates = coordinates; + this.medianHouseholdIncome = medianHouseholdIncome; + this.population = population; + } + + public String getZipcode() + { + return zipcode; + } + + public Pair<Double, Double> getCoordinates() + { + return coordinates; + } + + public double getMedianHouseholdIncome() + { + return medianHouseholdIncome; + } + + public long getPopulation() + { + return population; + } + + public double distance(ZipcodeRecord other) + { + if(other.getZipcode().equals(zipcode)) + return 0.0; + + Pair<Double, Double> otherCoords = other.getCoordinates(); + + double dist = Math.sin(Math.toRadians(coordinates.getFirst())) * + Math.sin(Math.toRadians(otherCoords.getFirst())) + + Math.cos(Math.toRadians(coordinates.getFirst())) * + Math.cos(Math.toRadians(otherCoords.getFirst())) * + Math.cos(Math.toRadians(coordinates.getSecond() - otherCoords.getSecond())); + dist = Math.toDegrees(Math.acos(dist)) * 69.09; + + return dist; + } + + public String getCity() + { + return city; + } + + public String getState() + { + return state; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/5646c87d/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/NameReader.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/NameReader.java b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/NameReader.java new file mode 100644 index 0000000..5a847ea --- /dev/null +++ b/bigtop-bigpetstore/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/NameReader.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datareaders; + +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.util.Map; +import java.util.Scanner; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.Names; + +import com.google.common.collect.Maps; + +public class NameReader +{ + InputStream path; + + public NameReader(InputStream path) + { + this.path = path; + } + + public Names readData() throws FileNotFoundException + { + Scanner scanner = new Scanner(path); + + Map<String, Double> firstNames = Maps.newHashMap(); + Map<String, Double> lastNames = Maps.newHashMap(); + + while(scanner.hasNextLine()) + { + String line = scanner.nextLine(); + String[] cols = line.trim().split(","); + + String name = cols[0]; + double weight = Double.parseDouble(cols[5]); + + if(cols[4].equals("1")) + firstNames.put(name, weight); + if(cols[3].equals("1")) + lastNames.put(name, weight); + } + + scanner.close(); + + return new Names(firstNames, lastNames); + + } +}
