lewismc commented on code in PR #905:
URL: https://github.com/apache/nutch/pull/905#discussion_r2907415018
##########
src/java/org/apache/nutch/indexer/IndexerOutputFormat.java:
##########
@@ -40,32 +33,67 @@ public RecordWriter<Text, NutchIndexAction> getRecordWriter(
Configuration conf = context.getConfiguration();
final IndexWriters writers = IndexWriters.get(conf);
- String name = getUniqueFile(context, "part", "");
- writers.open(conf, name);
+ // open writers (no temporary file output anymore)
+ writers.open(conf, "index");
LOG.info(writers.describe());
return new RecordWriter<Text, NutchIndexAction>() {
@Override
public void close(TaskAttemptContext context) throws IOException {
- // do the commits once and for all the reducers in one go
- boolean noCommit = conf
- .getBoolean(IndexerMapReduce.INDEXER_NO_COMMIT, false);
+
+ boolean noCommit =
+ conf.getBoolean(IndexerMapReduce.INDEXER_NO_COMMIT, false);
+
if (!noCommit) {
writers.commit();
}
+
writers.close();
}
@Override
public void write(Text key, NutchIndexAction indexAction)
throws IOException {
+
if (indexAction.action == NutchIndexAction.ADD) {
writers.write(indexAction.doc);
+
} else if (indexAction.action == NutchIndexAction.DELETE) {
writers.delete(key.toString());
}
}
};
}
-}
+
+ @Override
+ public void checkOutputSpecs(JobContext context)
+ throws IOException, InterruptedException {
+ // No output specs required since we don't write files
+ }
+
+ @Override
+ public OutputCommitter getOutputCommitter(TaskAttemptContext context)
+ throws IOException, InterruptedException {
+
+ return new OutputCommitter() {
+
+ @Override
+ public void setupJob(JobContext jobContext) {}
+
+ @Override
+ public void setupTask(TaskAttemptContext taskContext) {}
+
+ @Override
+ public boolean needsTaskCommit(TaskAttemptContext taskContext) {
+ return false;
+ }
+
+ @Override
+ public void commitTask(TaskAttemptContext taskContext) {}
+
+ @Override
+ public void abortTask(TaskAttemptContext taskContext) {}
+ };
Review Comment:
@shishir-kuet can you address this issue? Thank you.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]