Repository: flink
Updated Branches:
  refs/heads/master 21207fd52 -> 3586ced35


[FLINK-2043] Change the KMeansDataGenerator to allow passing a custom path

This closes #721


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/3586ced3
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/3586ced3
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/3586ced3

Branch: refs/heads/master
Commit: 3586ced3550ac036638a8dff011c01de99f9ed5e
Parents: 7164b2b
Author: Pietro Pinoli <pietro.pin...@gmail.com>
Authored: Sun May 24 13:35:35 2015 +0200
Committer: Fabian Hueske <fhue...@apache.org>
Committed: Wed May 27 00:39:08 2015 +0200

----------------------------------------------------------------------
 .../clustering/util/KMeansDataGenerator.java    | 23 ++++++++++----------
 1 file changed, 12 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/3586ced3/flink-examples/flink-java-examples/src/main/java/org/apache/flink/examples/java/clustering/util/KMeansDataGenerator.java
----------------------------------------------------------------------
diff --git 
a/flink-examples/flink-java-examples/src/main/java/org/apache/flink/examples/java/clustering/util/KMeansDataGenerator.java
 
b/flink-examples/flink-java-examples/src/main/java/org/apache/flink/examples/java/clustering/util/KMeansDataGenerator.java
index 897e0ca..2cb7457 100644
--- 
a/flink-examples/flink-java-examples/src/main/java/org/apache/flink/examples/java/clustering/util/KMeansDataGenerator.java
+++ 
b/flink-examples/flink-java-examples/src/main/java/org/apache/flink/examples/java/clustering/util/KMeansDataGenerator.java
@@ -52,14 +52,15 @@ public class KMeansDataGenerator {
         * <p>
         * The generator creates to files:
         * <ul>
-        * <li><code>{tmp.dir}/points</code> for the data points
-        * <li><code>{tmp.dir}/centers</code> for the cluster centers
+        * <li><code>&lt; output-path &gt;/points</code> for the data points
+        * <li><code>&lt; output-path &gt;/centers</code> for the cluster 
centers
         * </ul> 
         * 
         * @param args 
         * <ol>
         * <li>Int: Number of data points
         * <li>Int: Number of cluster centers
+        * <li><b>Optional</b> String: Output path, default value is {tmp.dir}
         * <li><b>Optional</b> Double: Standard deviation of data points
         * <li><b>Optional</b> Double: Value range of cluster centers
         * <li><b>Optional</b> Long: Random seed
@@ -69,20 +70,20 @@ public class KMeansDataGenerator {
 
                // check parameter count
                if (args.length < 2) {
-                       System.out.println("KMeansDataGenerator 
<numberOfDataPoints> <numberOfClusterCenters> [<relative stddev>] [<centroid 
range>] [<seed>]");
+                       System.out.println("KMeansDataGenerator 
<numberOfDataPoints> <numberOfClusterCenters> [<output-path>] [<relative 
stddev>] [<centroid range>] [<seed>]");
                        System.exit(1);
                }
 
                // parse parameters
                final int numDataPoints = Integer.parseInt(args[0]);
                final int k = Integer.parseInt(args[1]);
-               final double stddev = args.length > 2 ? 
Double.parseDouble(args[2]) : RELATIVE_STDDEV;
-               final double range = args.length > 3 ? 
Double.parseDouble(args[4]) : DEFAULT_VALUE_RANGE;
-               final long firstSeed = args.length > 4 ? 
Long.parseLong(args[4]) : DEFAULT_SEED;
+               final String outDir = args.length > 2 ? args[2] : 
System.getProperty("java.io.tmpdir");
+               final double stddev = args.length > 3 ? 
Double.parseDouble(args[3]) : RELATIVE_STDDEV;
+               final double range = args.length > 4 ? 
Double.parseDouble(args[4]) : DEFAULT_VALUE_RANGE;
+               final long firstSeed = args.length > 5 ? 
Long.parseLong(args[5]) : DEFAULT_SEED;
                
                final double absoluteStdDev = stddev * range;
                final Random random = new Random(firstSeed);
-               final String tmpDir = System.getProperty("java.io.tmpdir");
                
                // the means around which data points are distributed
                final double[][] means = uniformRandomCenters(random, k, 
DIMENSIONALITY, range);
@@ -90,7 +91,7 @@ public class KMeansDataGenerator {
                // write the points out
                BufferedWriter pointsOut = null;
                try {
-                       pointsOut = new BufferedWriter(new FileWriter(new 
File(tmpDir+"/"+POINTS_FILE)));
+                       pointsOut = new BufferedWriter(new FileWriter(new 
File(outDir+"/"+POINTS_FILE)));
                        StringBuilder buffer = new StringBuilder();
                        
                        double[] point = new double[DIMENSIONALITY];
@@ -115,7 +116,7 @@ public class KMeansDataGenerator {
                // write the uniformly distributed centers to a file
                BufferedWriter centersOut = null;
                try {
-                       centersOut = new BufferedWriter(new FileWriter(new 
File(tmpDir+"/"+CENTERS_FILE)));
+                       centersOut = new BufferedWriter(new FileWriter(new 
File(outDir+"/"+CENTERS_FILE)));
                        StringBuilder buffer = new StringBuilder();
                        
                        double[][] centers = uniformRandomCenters(random, k, 
DIMENSIONALITY, range);
@@ -130,8 +131,8 @@ public class KMeansDataGenerator {
                        }
                }
                
-               System.out.println("Wrote "+numDataPoints+" data points to 
"+tmpDir+"/"+POINTS_FILE);
-               System.out.println("Wrote "+k+" cluster centers to 
"+tmpDir+"/"+CENTERS_FILE);
+               System.out.println("Wrote "+numDataPoints+" data points to 
"+outDir+"/"+POINTS_FILE);
+               System.out.println("Wrote "+k+" cluster centers to 
"+outDir+"/"+CENTERS_FILE);
        }
        
        private static double[][] uniformRandomCenters(Random rnd, int num, int 
dimensionality, double range) {

Reply via email to