Author: srowen
Date: Tue Feb 22 21:17:35 2011
New Revision: 1073511
URL: http://svn.apache.org/viewvc?rev=1073511&view=rev
Log:
MAHOUT-614 fix up overriding of Hadoop's FileOutputFormat
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java?rev=1073511&r1=1073510&r2=1073511&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java
Tue Feb 22 21:17:35 2011
@@ -22,12 +22,10 @@ import java.io.IOException;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Progressable;
/**
* This abstract class extends the FileOutputFormat, allowing to write the
@@ -52,23 +50,10 @@ public abstract class MultipleOutputForm
/**
* Create a composite record writer that can write key/value data to
different
* output files
- *
- * @param fs
- * the file system to use
- * @param job
- * the job conf for the job
- * @param name
- * the leaf file name for the output file (such as part-00000")
- * @param arg3
- * a progressable for reporting progress.
* @return a composite record writer
*/
- public RecordWriter<K, V> getRecordWriter(FileSystem fs, Configuration job,
String name, Progressable arg3) {
-
- final FileSystem myFS = fs;
- final String myName = generateLeafFileName(name);
- final Configuration myJob = job;
- final Progressable myProgressable = arg3;
+ @Override
+ public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) {
return new RecordWriter<K, V>() {
@@ -76,13 +61,13 @@ public abstract class MultipleOutputForm
private final TreeMap<String, RecordWriter<K, V>> recordWriters = new
TreeMap<String, RecordWriter<K, V>>();
@Override
- public void write(K key, V value) throws IOException {
+ public void write(K key, V value) throws IOException,
InterruptedException {
// get the file name based on the key
- String keyBasedPath = generateFileNameForKeyValue(key, value, myName);
+ String keyBasedPath = generateFileNameForKeyValue(key, value,
generateLeafFileName(null));
// get the file name based on the input file name
- String finalPath = getInputFileBasedOutputFileName(myJob,
keyBasedPath);
+ String finalPath =
getInputFileBasedOutputFileName(context.getConfiguration(), keyBasedPath);
// get the actual key
K actualKey = generateActualKey(key, value);
@@ -93,7 +78,7 @@ public abstract class MultipleOutputForm
// if we don't have the record writer yet for the final path, create
// one
// and add it to the cache
- rw = getBaseRecordWriter(myFS, myJob, finalPath, myProgressable);
+ rw = getBaseRecordWriter(context.getConfiguration());
this.recordWriters.put(finalPath, rw);
}
try {
@@ -177,19 +162,19 @@ public abstract class MultipleOutputForm
* N trailing legs of the input file name where N is the config value for
* "num.of.trailing.legs.to.use".
*
- * @param job
+ * @param conf
* the job config
* @param name
* the output file name
* @return the outfile name based on a given anme and the input file name.
*/
- protected String getInputFileBasedOutputFileName(Configuration job, String
name) {
- String infilepath = job.get("map.input.file");
+ protected String getInputFileBasedOutputFileName(Configuration conf, String
name) {
+ String infilepath = conf.get("map.input.file");
if (infilepath == null) {
// if the map input file does not exists, then return the given name
return name;
}
- int numOfTrailingLegsToUse =
job.getInt("mapred.outputformat.numOfTrailingLegs", 0);
+ int numOfTrailingLegsToUse =
conf.getInt("mapred.outputformat.numOfTrailingLegs", 0);
if (numOfTrailingLegsToUse <= 0) {
return name;
}
@@ -212,21 +197,8 @@ public abstract class MultipleOutputForm
}
/**
- *
- * @param fs
- * the file system to use
- * @param job
- * a job conf object
- * @param name
- * the name of the file over which a record writer object will be
- * constructed
- * @param arg3
- * a progressable object
* @return A RecordWriter object over the given file
- * @throws IOException
*/
- protected abstract RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
- Configuration job,
- String name,
- Progressable arg3)
throws IOException;
+ protected abstract RecordWriter<K, V> getBaseRecordWriter(Configuration conf)
+ throws IOException, InterruptedException;
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java?rev=1073511&r1=1073510&r2=1073511&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java
Tue Feb 22 21:17:35 2011
@@ -20,12 +20,10 @@ package org.apache.mahout.classifier.bay
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.Progressable;
/**
* This class extends the MultipleOutputFormat, allowing to write the output
@@ -36,24 +34,11 @@ public class MultipleTextOutputFormat<K,
private TextOutputFormat<K, V> theTextOutputFormat;
@Override
- protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
Configuration conf, String name, Progressable arg3)
- throws IOException {
+ protected RecordWriter<K, V> getBaseRecordWriter(Configuration conf) throws
IOException, InterruptedException {
if (theTextOutputFormat == null) {
theTextOutputFormat = new TextOutputFormat<K, V>();
}
- try {
- return theTextOutputFormat.getRecordWriter(new TaskAttemptContext(conf,
new TaskAttemptID()));
- } catch (InterruptedException e) {
- // continue
- }
- return null;
+ return theTextOutputFormat.getRecordWriter(new TaskAttemptContext(conf,
new TaskAttemptID()));
}
- @Override
- public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws
IOException, InterruptedException {
- if (theTextOutputFormat == null) {
- theTextOutputFormat = new TextOutputFormat<K, V>();
- }
- return theTextOutputFormat.getRecordWriter(job);
- }
}