Repository: flink Updated Branches: refs/heads/master 21207fd52 -> 3586ced35
[FLINK-2043] Change the KMeansDataGenerator to allow passing a custom path This closes #721 Project: http://git-wip-us.apache.org/repos/asf/flink/repo Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/3586ced3 Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/3586ced3 Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/3586ced3 Branch: refs/heads/master Commit: 3586ced3550ac036638a8dff011c01de99f9ed5e Parents: 7164b2b Author: Pietro Pinoli <pietro.pin...@gmail.com> Authored: Sun May 24 13:35:35 2015 +0200 Committer: Fabian Hueske <fhue...@apache.org> Committed: Wed May 27 00:39:08 2015 +0200 ---------------------------------------------------------------------- .../clustering/util/KMeansDataGenerator.java | 23 ++++++++++---------- 1 file changed, 12 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/flink/blob/3586ced3/flink-examples/flink-java-examples/src/main/java/org/apache/flink/examples/java/clustering/util/KMeansDataGenerator.java ---------------------------------------------------------------------- diff --git a/flink-examples/flink-java-examples/src/main/java/org/apache/flink/examples/java/clustering/util/KMeansDataGenerator.java b/flink-examples/flink-java-examples/src/main/java/org/apache/flink/examples/java/clustering/util/KMeansDataGenerator.java index 897e0ca..2cb7457 100644 --- a/flink-examples/flink-java-examples/src/main/java/org/apache/flink/examples/java/clustering/util/KMeansDataGenerator.java +++ b/flink-examples/flink-java-examples/src/main/java/org/apache/flink/examples/java/clustering/util/KMeansDataGenerator.java @@ -52,14 +52,15 @@ public class KMeansDataGenerator { * <p> * The generator creates to files: * <ul> - * <li><code>{tmp.dir}/points</code> for the data points - * <li><code>{tmp.dir}/centers</code> for the cluster centers + * <li><code>< output-path >/points</code> for the data points + * <li><code>< output-path >/centers</code> for the cluster centers * </ul> * * @param args * <ol> * <li>Int: Number of data points * <li>Int: Number of cluster centers + * <li><b>Optional</b> String: Output path, default value is {tmp.dir} * <li><b>Optional</b> Double: Standard deviation of data points * <li><b>Optional</b> Double: Value range of cluster centers * <li><b>Optional</b> Long: Random seed @@ -69,20 +70,20 @@ public class KMeansDataGenerator { // check parameter count if (args.length < 2) { - System.out.println("KMeansDataGenerator <numberOfDataPoints> <numberOfClusterCenters> [<relative stddev>] [<centroid range>] [<seed>]"); + System.out.println("KMeansDataGenerator <numberOfDataPoints> <numberOfClusterCenters> [<output-path>] [<relative stddev>] [<centroid range>] [<seed>]"); System.exit(1); } // parse parameters final int numDataPoints = Integer.parseInt(args[0]); final int k = Integer.parseInt(args[1]); - final double stddev = args.length > 2 ? Double.parseDouble(args[2]) : RELATIVE_STDDEV; - final double range = args.length > 3 ? Double.parseDouble(args[4]) : DEFAULT_VALUE_RANGE; - final long firstSeed = args.length > 4 ? Long.parseLong(args[4]) : DEFAULT_SEED; + final String outDir = args.length > 2 ? args[2] : System.getProperty("java.io.tmpdir"); + final double stddev = args.length > 3 ? Double.parseDouble(args[3]) : RELATIVE_STDDEV; + final double range = args.length > 4 ? Double.parseDouble(args[4]) : DEFAULT_VALUE_RANGE; + final long firstSeed = args.length > 5 ? Long.parseLong(args[5]) : DEFAULT_SEED; final double absoluteStdDev = stddev * range; final Random random = new Random(firstSeed); - final String tmpDir = System.getProperty("java.io.tmpdir"); // the means around which data points are distributed final double[][] means = uniformRandomCenters(random, k, DIMENSIONALITY, range); @@ -90,7 +91,7 @@ public class KMeansDataGenerator { // write the points out BufferedWriter pointsOut = null; try { - pointsOut = new BufferedWriter(new FileWriter(new File(tmpDir+"/"+POINTS_FILE))); + pointsOut = new BufferedWriter(new FileWriter(new File(outDir+"/"+POINTS_FILE))); StringBuilder buffer = new StringBuilder(); double[] point = new double[DIMENSIONALITY]; @@ -115,7 +116,7 @@ public class KMeansDataGenerator { // write the uniformly distributed centers to a file BufferedWriter centersOut = null; try { - centersOut = new BufferedWriter(new FileWriter(new File(tmpDir+"/"+CENTERS_FILE))); + centersOut = new BufferedWriter(new FileWriter(new File(outDir+"/"+CENTERS_FILE))); StringBuilder buffer = new StringBuilder(); double[][] centers = uniformRandomCenters(random, k, DIMENSIONALITY, range); @@ -130,8 +131,8 @@ public class KMeansDataGenerator { } } - System.out.println("Wrote "+numDataPoints+" data points to "+tmpDir+"/"+POINTS_FILE); - System.out.println("Wrote "+k+" cluster centers to "+tmpDir+"/"+CENTERS_FILE); + System.out.println("Wrote "+numDataPoints+" data points to "+outDir+"/"+POINTS_FILE); + System.out.println("Wrote "+k+" cluster centers to "+outDir+"/"+CENTERS_FILE); } private static double[][] uniformRandomCenters(Random rnd, int num, int dimensionality, double range) {