Author: srowen
Date: Wed Jan 26 23:21:41 2011
New Revision: 1063916

URL: http://svn.apache.org/viewvc?rev=1063916&view=rev
Log:
MAHOUT-594 Replace FileWriter and FileReader usage with idiom that properly 
specifies character encoding

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmDataConverter.java
    
mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java
    
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
    
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
    
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
    
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
    
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
    
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
    
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
    
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java
 Wed Jan 26 23:21:41 2011
@@ -42,15 +42,18 @@ import org.apache.mahout.math.stats.Onli
 
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.Reader;
+import java.io.Writer;
 import java.lang.reflect.Type;
+import java.nio.charset.Charset;
 import java.util.List;
 
 /**
@@ -89,7 +92,7 @@ public final class ModelSerializer {
   }
 
   public static void writeJson(String path, OnlineLearner model) throws 
IOException {
-    OutputStreamWriter out = new FileWriter(path);
+    Writer out = new OutputStreamWriter(new FileOutputStream(new File(path)), 
Charset.forName("UTF-8"));
     try {
       out.write(gson().toJson(model));
     } finally {
@@ -414,6 +417,8 @@ public final class ModelSerializer {
   }
 
   public static void main(String[] args) throws FileNotFoundException {
-    OnlineLogisticRegression m = ModelSerializer.loadJsonFrom(new 
FileReader("/tmp/news-group-1000.model"), OnlineLogisticRegression.class);
+    loadJsonFrom(new InputStreamReader(new FileInputStream(new 
File("/tmp/news-group-1000.model")),
+                                       Charset.forName("UTF-8")),
+                 OnlineLogisticRegression.class);
   }
 }

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
 Wed Jan 26 23:21:41 2011
@@ -19,8 +19,10 @@ package org.apache.mahout.cf.taste.hadoo
 
 import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileReader;
+import java.io.FileInputStream;
 import java.io.FilenameFilter;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.List;
 
@@ -212,7 +214,8 @@ public final class ItemSimilarityTest ex
         return name.startsWith("part-");
       }
     })[0];
-    BufferedReader reader = new BufferedReader(new FileReader(outPart));
+    BufferedReader reader = new BufferedReader(
+        new InputStreamReader(new FileInputStream(outPart), 
Charset.forName("UTF-8")));
 
     String line;
     int currentLine = 1;
@@ -308,7 +311,8 @@ public final class ItemSimilarityTest ex
         return name.startsWith("part-");
       }
     })[0];
-    BufferedReader reader = new BufferedReader(new FileReader(outPart));
+    BufferedReader reader = new BufferedReader(
+        new InputStreamReader(new FileInputStream(outPart), 
Charset.forName("UTF-8")));
 
     String line;
     int currentLine = 1;

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java
 Wed Jan 26 23:21:41 2011
@@ -17,9 +17,11 @@
 
 package org.apache.mahout.fpm.pfpgrowth;
 
-import java.io.BufferedWriter;
 import java.io.File;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -54,7 +56,7 @@ public final class PFPGrowthTest extends
     File input = new File(inputDir, "test.txt");
     params.set(PFPGrowth.INPUT, input.getAbsolutePath());
     params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());
-    BufferedWriter writer = new BufferedWriter(new FileWriter(input));
+    Writer writer = new OutputStreamWriter(new FileOutputStream(input), 
Charset.forName("UTF-8"));
     try {
       Collection<List<String>> transactions = new ArrayList<List<String>>();
       transactions.add(Arrays.asList("E", "A", "D", "B"));

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java
 Wed Jan 26 23:21:41 2011
@@ -20,7 +20,6 @@ package org.apache.mahout.classifier.sgd
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
 import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
 import com.google.gson.InstanceCreator;
 import com.google.gson.JsonArray;
 import com.google.gson.JsonDeserializationContext;
@@ -34,12 +33,13 @@ import org.apache.mahout.math.DenseMatri
 import org.apache.mahout.math.Matrix;
 
 import java.io.File;
-import java.io.FileReader;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.Writer;
 import java.lang.reflect.Type;
+import java.nio.charset.Charset;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -145,7 +145,7 @@ public class LogisticModelParameters {
    * @throws IOException If there is an error opening or closing the file.
    */
   public static LogisticModelParameters loadFrom(File in) throws IOException {
-    InputStreamReader input = new FileReader(in);
+    Reader input = new InputStreamReader(new FileInputStream(in), 
Charset.forName("UTF-8"));
     try {
       return loadFrom(input);
     } finally {

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java
 Wed Jan 26 23:21:41 2011
@@ -31,11 +31,12 @@ import org.apache.mahout.vectorizer.enco
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.io.PrintWriter;
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
 import java.util.List;
 import java.util.Random;
 
@@ -84,7 +85,8 @@ public class SimpleCsvExamples {
       }
       out.close();
     } else if ("--parse".equals(args[0])) {
-      BufferedReader in = new BufferedReader(new FileReader(args[1]));
+      BufferedReader in = new BufferedReader(
+          new InputStreamReader(new FileInputStream(new File(args[1])), 
Charset.forName("UTF-8")));
       String line = in.readLine();
       while (line != null) {
         v.assign(0);

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java
 Wed Jan 26 23:21:41 2011
@@ -31,13 +31,16 @@ import org.apache.mahout.math.RandomAcce
 import org.apache.mahout.math.Vector;
 
 import java.io.BufferedReader;
-import java.io.FileReader;
-import java.io.FileWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.PrintStream;
-import java.net.URL;
+import java.io.Writer;
+import java.nio.charset.Charset;
 import java.util.List;
 
 
@@ -101,7 +104,7 @@ public final class TrainLogistic {
         in.close();
       }
 
-      OutputStreamWriter modelOutput = new FileWriter(outputFile);
+      Writer modelOutput = new OutputStreamWriter(new 
FileOutputStream(outputFile), Charset.forName("UTF-8"));
       try {
         lmp.saveTo(modelOutput);
       } finally {
@@ -299,13 +302,12 @@ public final class TrainLogistic {
   }
 
   static BufferedReader open(String inputFile) throws IOException {
-    InputStreamReader s;
+    InputStream in;
     try {
-      URL resource = Resources.getResource(inputFile);
-      s = new InputStreamReader(resource.openStream());
+      in= Resources.getResource(inputFile).openStream();
     } catch (IllegalArgumentException e) {
-      s = new FileReader(inputFile);
+      in = new FileInputStream(new File(inputFile));
     }
-    return new BufferedReader(s);
+    return new BufferedReader(new InputStreamReader(in, 
Charset.forName("UTF-8")));
   }
 }

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
 Wed Jan 26 23:21:41 2011
@@ -42,10 +42,12 @@ import org.apache.mahout.vectorizer.enco
 
 import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileReader;
+import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.StringReader;
+import java.nio.charset.Charset;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
 import java.util.Collection;
@@ -214,7 +216,8 @@ public final class TrainNewsGroups {
       }
       if (k % (bump * scale) == 0) {
         if (learningAlgorithm.getBest() != null) {
-          ModelSerializer.writeBinary("/tmp/news-group-" + k + ".model", 
learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0));
+          ModelSerializer.writeBinary("/tmp/news-group-" + k + ".model",
+                                      
learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0));
         }
 
         step += 0.25;
@@ -227,7 +230,8 @@ public final class TrainNewsGroups {
     dissect(leakType, newsGroups, learningAlgorithm, files);
     System.out.println("exiting main");
 
-    ModelSerializer.writeBinary("/tmp/news-group.model", 
learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0));
+    ModelSerializer.writeBinary("/tmp/news-group.model",
+                                
learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0));
 
     List<Integer> counts = Lists.newArrayList();
     System.out.printf("Word counts\n");
@@ -270,8 +274,9 @@ public final class TrainNewsGroups {
     List<String> ngNames = Lists.newArrayList(newsGroups.values());
     List<ModelDissector.Weight> weights = md.summary(100);
     for (ModelDissector.Weight w : weights) {
-      System.out.printf("%s\t%.1f\t%s\t%.1f\t%s\t%.1f\t%s\n", w.getFeature(), 
w.getWeight(), ngNames.get(w.getMaxImpact() + 1),
-        w.getCategory(1), w.getWeight(1), w.getCategory(2), w.getWeight(2));
+      System.out.printf("%s\t%.1f\t%s\t%.1f\t%s\t%.1f\t%s\n",
+                        w.getFeature(), w.getWeight(), 
ngNames.get(w.getMaxImpact() + 1),
+                        w.getCategory(1), w.getWeight(1), w.getCategory(2), 
w.getWeight(2));
     }
   }
 
@@ -279,7 +284,8 @@ public final class TrainNewsGroups {
     long date = (long) (1000 * (DATE_REFERENCE + actual * MONTH + 1 * WEEK * 
rand.nextDouble()));
     Multiset<String> words = ConcurrentHashMultiset.create();
 
-    BufferedReader reader = new BufferedReader(new FileReader(file));
+    BufferedReader reader =
+        new BufferedReader(new InputStreamReader(new FileInputStream(file), 
Charset.forName("UTF-8")));
     try {
       String line = reader.readLine();
       Reader dateString = new StringReader(DATE_FORMATS[leakType % 
3].format(new Date(date)));

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java
 Wed Jan 26 23:21:41 2011
@@ -19,8 +19,11 @@ package org.apache.mahout.clustering.dis
 
 import java.awt.Graphics;
 import java.awt.Graphics2D;
-import java.io.FileWriter;
-import java.io.PrintWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.Charset;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -54,16 +57,16 @@ class DisplaySpectralKMeans extends Disp
     if (!fs.exists(output)) {
       fs.mkdirs(output);
     }
-    FileWriter writer = new FileWriter(affinities.toString());
-    PrintWriter out = new PrintWriter(writer);
+    Writer writer = new OutputStreamWriter(
+        new FileOutputStream(new File(affinities.toString())), 
Charset.forName("UTF-8"));
     try {
       for (int i = 0; i < SAMPLE_DATA.size(); i++) {
         for (int j = 0; j < SAMPLE_DATA.size(); j++) {
-          out.println(i + "," + j + ',' + 
measure.distance(SAMPLE_DATA.get(i).get(), SAMPLE_DATA.get(j).get()));
+          writer.write(i + "," + j + ',' + 
measure.distance(SAMPLE_DATA.get(i).get(), SAMPLE_DATA.get(j).get()) + '\n');
         }
       }
     } finally {
-      out.close();
+      writer.close();
     }
     int maxIter = 10;
     double convergenceDelta = 0.001;

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmDataConverter.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmDataConverter.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmDataConverter.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmDataConverter.java
 Wed Jan 26 23:21:41 2011
@@ -27,8 +27,11 @@ import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
 import java.io.BufferedReader;
-import java.io.FileReader;
+import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -97,7 +100,8 @@ public final class LastfmDataConverter {
     Map<String, Integer> featureIdxMap = new HashMap<String, Integer>();
     Map<String, List<Integer>> itemFeaturesMap = new HashMap<String, 
List<Integer>>();
     String msg = usedMemory() + "Converting data to internal vector format: ";
-    BufferedReader br = new BufferedReader(new FileReader(inputFile));
+    BufferedReader br = new BufferedReader(
+        new InputStreamReader(new FileInputStream(new File(inputFile)), 
Charset.forName("UTF-8")));
     try {
       System.out.print(msg);
       int prevPercentDone = 1;
@@ -194,8 +198,7 @@ public final class LastfmDataConverter {
       return;
     }
     Lastfm dataSet = Lastfm.valueOf(args[2]);
-    Map<String, List<Integer>> itemFeatures = convertToItemFeatures(args[0],
-        dataSet);
+    Map<String, List<Integer>> itemFeatures = convertToItemFeatures(args[0], 
dataSet);
     if (itemFeatures.isEmpty()) {
       throw new IllegalStateException("Error converting the data file: [" + 
args[0] + ']');
     }

Modified: 
mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java
 (original)
+++ 
mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java
 Wed Jan 26 23:21:41 2011
@@ -17,9 +17,7 @@
 
 package org.apache.mahout.classifier.sgd;
 
-import com.google.common.base.CharMatcher;
 import com.google.common.base.Charsets;
-import com.google.common.base.Splitter;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Sets;
 import com.google.common.io.Resources;
@@ -30,23 +28,24 @@ import org.apache.mahout.math.Vector;
 import org.junit.Test;
 
 import java.io.ByteArrayOutputStream;
-import java.io.FileReader;
+import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.io.PrintStream;
+import java.io.Reader;
 import java.lang.reflect.Field;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
+import java.nio.charset.Charset;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
 public class TrainLogisticTest extends MahoutTestCase {
 
-  private static final Splitter ON_WHITE_SPACE = 
-      
Splitter.on(CharMatcher.BREAKING_WHITESPACE).trimResults().omitEmptyStrings();
-
   @Test
-  public void example13_1() throws IOException, NoSuchFieldException, 
IllegalAccessException, InvocationTargetException, NoSuchMethodException {
+  public void example13_1() throws Exception {
     String outputFile = getTestTempFile("model").getAbsolutePath();
 
     String trainOut = runMain(TrainLogistic.class, new String[]{
@@ -78,35 +77,57 @@ public class TrainLogisticTest extends M
     verifyModel(lmp, csv, data, model, expectedValues);
 
     // test saved model
-    LogisticModelParameters lmpOut = LogisticModelParameters.loadFrom(new 
FileReader(outputFile));
+    Reader in = new InputStreamReader(new FileInputStream(new 
File(outputFile)), Charset.forName("UTF-8"));
+    LogisticModelParameters lmpOut = LogisticModelParameters.loadFrom(in);
+    in.close();
     CsvRecordFactory csvOut = lmpOut.getCsvRecordFactory();
     csvOut.firstLine(data.get(0));
     OnlineLogisticRegression lrOut = lmpOut.createRegression();
     verifyModel(lmpOut, csvOut, data, lrOut, expectedValues);
 
-    String output = runMain(RunLogistic.class, new String[]{"--input", 
"donut.csv", "--model", outputFile, "--auc", "--confusion"});
+    String output = runMain(RunLogistic.class, new String[]{
+        "--input", "donut.csv",
+        "--model", outputFile,
+        "--auc",
+        "--confusion"
+    });
     assertTrue(output.contains("AUC = 0.57"));
     assertTrue(output.contains("confusion: [[27.0, 13.0], [0.0, 0.0]]"));
   }
 
   @Test
-  public void example13_2() throws InvocationTargetException, IOException, 
NoSuchMethodException, NoSuchFieldException, IllegalAccessException {
+  public void example13_2() throws Exception {
     String outputFile = getTestTempFile("model").getAbsolutePath();
     String trainOut = runMain(TrainLogistic.class, new String[]{
-      "--input", "donut.csv", "--output", outputFile,
-      "--target", "color", "--categories", "2",
-      "--predictors", "x", "y", "a", "b", "c", "--types", "numeric",
-      "--features", "20", "--passes", "100", "--rate", "50"
+        "--input", "donut.csv",
+        "--output", outputFile,
+        "--target", "color",
+        "--categories", "2",
+        "--predictors", "x", "y", "a", "b", "c",
+        "--types", "numeric",
+        "--features", "20",
+        "--passes", "100",
+        "--rate", "50"
     });
 
     assertTrue(trainOut.contains("a 0."));
     assertTrue(trainOut.contains("b -1."));
     assertTrue(trainOut.contains("c -25."));
 
-    String output = runMain(RunLogistic.class, new String[]{"--input", 
"donut.csv", "--model", outputFile, "--auc", "--confusion"});
+    String output = runMain(RunLogistic.class, new String[]{
+        "--input", "donut.csv",
+        "--model", outputFile,
+        "--auc",
+        "--confusion"
+    });
     assertTrue(output.contains("AUC = 1.00"));
 
-    String heldout = runMain(RunLogistic.class, new String[]{"--input", 
"donut-test.csv", "--model", outputFile, "--auc", "--confusion"});
+    String heldout = runMain(RunLogistic.class, new String[]{
+        "--input", "donut-test.csv",
+        "--model", outputFile,
+        "--auc",
+        "--confusion"
+    });
     assertTrue(heldout.contains("AUC = 0.9"));
   }
 
@@ -124,7 +145,8 @@ public class TrainLogisticTest extends M
    * @throws NoSuchMethodException         If there isn't a main method.
    * @throws InvocationTargetException     If the main method throws an 
exception.
    */
-  private String runMain(Class clazz, String[] args) throws IOException, 
NoSuchFieldException, IllegalAccessException, NoSuchMethodException, 
InvocationTargetException {
+  private static String runMain(Class<?> clazz, String[] args)
+    throws NoSuchFieldException, IllegalAccessException, 
NoSuchMethodException, InvocationTargetException {
     ByteArrayOutputStream trainOutput = new ByteArrayOutputStream();
     PrintStream printStream = new PrintStream(trainOutput);
 
@@ -139,7 +161,11 @@ public class TrainLogisticTest extends M
     return new String(trainOutput.toByteArray(), Charsets.UTF_8);
   }
 
-  private void verifyModel(LogisticModelParameters lmp, CsvRecordFactory csv, 
List<String> data, AbstractVectorClassifier model, Map<String, Double> 
expectedValues) {
+  private static void verifyModel(LogisticModelParameters lmp,
+                                  RecordFactory csv,
+                                  List<String> data,
+                                  AbstractVectorClassifier model,
+                                  Map<String, Double> expectedValues) {
     ModelDissector md = new ModelDissector();
     for (String line : data.subList(1, data.size())) {
       Vector v = new DenseVector(lmp.getNumFeatures());

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
 Wed Jan 26 23:21:41 2011
@@ -18,9 +18,11 @@
 package org.apache.mahout.clustering.lda;
 
 import java.io.File;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
 import java.io.IOException;
-import java.io.PrintWriter;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -233,14 +235,17 @@ public final class LDAPrintTopics {
   private static void writeTopWords(List<List<String>> topWords, File output) 
throws IOException {
     for (int i = 0; i < topWords.size(); ++i) {
       List<String> topK = topWords.get(i);
-      File out = new File(output, "topic-" + i);
-      PrintWriter writer = new PrintWriter(new FileWriter(out));
-      writer.println("Topic " + i);
-      writer.println("===========");
-      for (String word : topK) {
-        writer.println(word);
+      Writer writer = new OutputStreamWriter(
+          new FileOutputStream(new File(output, "topic-" + i)), 
Charset.forName("UTF-8"));
+      try {
+        writer.write("Topic " + i + '\n');
+        writer.write("===========\n");
+        for (String word : topK) {
+          writer.write(word + '\n');
+        }
+      } finally {
+        writer.close();
       }
-      writer.close();
     }
   }
   

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
 Wed Jan 26 23:21:41 2011
@@ -17,10 +17,12 @@
 
 package org.apache.mahout.utils;
 
-import java.io.FileWriter;
+import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
+import java.nio.charset.Charset;
 
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
@@ -86,7 +88,8 @@ public final class SequenceFileDumper {
         
         Writer writer;
         if (cmdLine.hasOption(outputOpt)) {
-          writer = new FileWriter(cmdLine.getValue(outputOpt).toString());
+          writer = new OutputStreamWriter(
+              new FileOutputStream(new 
File(cmdLine.getValue(outputOpt).toString())), Charset.forName("UTF-8"));
         } else {
           writer = new OutputStreamWriter(System.out);
         }

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
 Wed Jan 26 23:21:41 2011
@@ -18,10 +18,11 @@
 package org.apache.mahout.utils.clustering;
 
 import java.io.File;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -158,7 +159,12 @@ public final class ClusterDumper extends
       }
     }
 
-    Writer writer = this.outputFile == null ? new 
OutputStreamWriter(System.out) : new FileWriter(this.outputFile);
+    Writer writer;
+    if (this.outputFile == null) {
+      writer = new OutputStreamWriter(System.out);
+    } else {
+      writer = new OutputStreamWriter(new FileOutputStream(new 
File(this.outputFile)), Charset.forName("UTF-8"));
+    }
     try {
       FileSystem fs = seqFileDir.getFileSystem(conf);
       for (FileStatus seqFile : fs.globStatus(new Path(seqFileDir, "part-*"))) 
{

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
 Wed Jan 26 23:21:41 2011
@@ -38,9 +38,10 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.File;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
+import java.nio.charset.Charset;
 
 /**
  * Can read in a {@link SequenceFile} of {@link Vector}s and dump
@@ -129,9 +130,13 @@ public final class VectorDumper {
         Writable valueWritable = 
reader.getValueClass().asSubclass(Writable.class).newInstance();
         boolean transposeKeyValue = cmdLine.hasOption(vectorAsKeyOpt);
         try {
-          Writer writer = cmdLine.hasOption(outputOpt)
-                  ? new FileWriter(cmdLine.getValue(outputOpt).toString())
-                  : new OutputStreamWriter(System.out);
+          Writer writer;
+          if (cmdLine.hasOption(outputOpt)) {
+            writer = new OutputStreamWriter(
+                new FileOutputStream(new 
File(cmdLine.getValue(outputOpt).toString())), Charset.forName("UTF-8"));
+          } else {
+            writer = new OutputStreamWriter(System.out);
+          }
           try {
             boolean printKey = cmdLine.hasOption(printKeyOpt);
             long i = 0;

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
 Wed Jan 26 23:21:41 2011
@@ -20,7 +20,6 @@ package org.apache.mahout.utils.vectors.
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
@@ -61,7 +60,7 @@ public class ARFFVectorIterable implemen
   private final ARFFModel model;
   
   public ARFFVectorIterable(File file, ARFFModel model) throws IOException {
-    this(new FileReader(file), model);
+    this(file, Charset.forName("UTF-8"), model);
   }
   
   public ARFFVectorIterable(File file, Charset encoding, ARFFModel model) 
throws IOException {

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
 Wed Jan 26 23:21:41 2011
@@ -20,7 +20,6 @@ package org.apache.mahout.utils.vectors.
 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileOutputStream;
-import java.io.FileWriter;
 import java.io.FilenameFilter;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
@@ -173,7 +172,8 @@ public final class Driver {
       vectorWriter = getSeqFileWriter(outFile);
     } else {
       if ("file".equals(outWriter)) {
-        vectorWriter = new JWriterVectorWriter(new BufferedWriter(new 
FileWriter(outFile)));
+        vectorWriter = new JWriterVectorWriter(
+            new OutputStreamWriter(new FileOutputStream(new File(outFile)), 
Charset.forName("UTF-8")));
       } else {
         vectorWriter = getSeqFileWriter(outFile);
       }

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
 Wed Jan 26 23:21:41 2011
@@ -18,10 +18,11 @@
 package org.apache.mahout.utils.vectors.lucene;
 
 import java.io.File;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
+import java.nio.charset.Charset;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
@@ -144,7 +145,12 @@ public class ClusterLabels {
 
   public void getLabels() throws IOException {
 
-    Writer writer = this.output == null ? new OutputStreamWriter(System.out) : 
new FileWriter(this.output);
+    Writer writer;
+    if (this.output == null) {
+      writer = new OutputStreamWriter(System.out);
+    } else {
+      writer = new OutputStreamWriter(new FileOutputStream(new 
File(this.output)), Charset.forName("UTF-8"));
+    }
     try {
       for (Map.Entry<Integer, List<WeightedVectorWritable>> integerListEntry : 
clusterIdToPoints.entrySet()) {
         List<WeightedVectorWritable> wvws = integerListEntry.getValue();

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java?rev=1063916&r1=1063915&r2=1063916&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
 Wed Jan 26 23:21:41 2011
@@ -17,12 +17,11 @@
 
 package org.apache.mahout.utils.vectors.lucene;
 
-import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileOutputStream;
-import java.io.FileWriter;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
+import java.io.Writer;
 import java.nio.charset.Charset;
 
 import org.apache.commons.cli2.CommandLine;
@@ -214,7 +213,7 @@ public final class Driver {
         if (cmdLine.hasOption(outWriterOpt)) {
           String outWriter = cmdLine.getValue(outWriterOpt).toString();
           if ("file".equals(outWriter)) {
-            BufferedWriter writer = new BufferedWriter(new 
FileWriter(outFile));
+            Writer writer = new OutputStreamWriter(new FileOutputStream(new 
File(outFile)), Charset.forName("UTF8"));
             vectorWriter = new JWriterVectorWriter(writer);
           } else {
             vectorWriter = getSeqFileWriter(outFile);
@@ -231,8 +230,7 @@ public final class Driver {
         
         File dictOutFile = new File(cmdLine.getValue(dictOutOpt).toString());
         log.info("Dictionary Output file: {}", dictOutFile);
-        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
-            new FileOutputStream(dictOutFile), Charset.forName("UTF8")));
+        Writer writer = new OutputStreamWriter(new 
FileOutputStream(dictOutFile), Charset.forName("UTF8"));
         JWriterTermInfoWriter tiWriter = new JWriterTermInfoWriter(writer, 
delimiter, field);
         tiWriter.write(termInfo);
         tiWriter.close();


Reply via email to