Hi,
The main process for MAHOUT-696 is as following, but it will always
cause a NullPointerException after the first call to getBest, can we continue
training AdaptiveLogisticRegressions after using getBest() to score some new
lines just as TrainLogistic does?
double logPEstimate = 0;
int k = 0;
CsvRecordFactory csv = lmp.getCsvRecordFactory();
model = lmp.createAdaptiveLogisticRegression();
State<Wrapper, CrossFoldLearner> best = null;
CrossFoldLearner learner = null;
for (int pass = 0; pass < passes; pass++) {
BufferedReader in = open(inputFile);
// read variable names
csv.firstLine(in.readLine());
String line = in.readLine();
while (line != null) {
// for each new line, get target and
predictors
Vector input = new
RandomAccessSparseVector(lmp.getNumFeatures());
int targetValue = csv.processLine(line,
input);
// update model
model.train(targetValue, input);
k ++;
if (scores && (k % (skipscorenum + 1)
== 0) ) {
best = model.getBest();
if (null != best) {
learner =
best.getPayload().getLearner();
}
if (learner != null) {
// check performance while this
is still news
double logP =
learner.logLikelihood(targetValue, input);
if (!Double.isInfinite(logP)) {
if (k < 20) {
logPEstimate =
(k * logPEstimate + logP)
/ (k + 1);
} else {
logPEstimate =
0.95 * logPEstimate + 0.05
* logP;
}
}
double p =
learner.classifyScalar(input);
output.printf(Locale.ENGLISH,
"%10d %2d
%10.2f %2.4f %10.4f %10.4f\n",
k, targetValue,
learner.percentCorrect(), p, logP,
logPEstimate);
}else{
output.printf(Locale.ENGLISH,
"%10d
%2d %s\n", k, targetValue,
"AdaptiveLogisticRegression is not ready for scoring ... ");
}
}
line = in.readLine();
}
in.close();
}
100 1 AdaptiveLogisticRegression is not ready for scoring ...
200 0 AdaptiveLogisticRegression is not ready for scoring ...
300 1 AdaptiveLogisticRegression is not ready for scoring ...
400 0 AdaptiveLogisticRegression is not ready for scoring ...
500 1 AdaptiveLogisticRegression is not ready for scoring ...
Exception in thread "main" java.lang.IllegalStateException:
java.lang.NullPointerException
at
org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression.trainWithBufferedExamples(AdaptiveLogisticRegression.java:144)
at
org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression.train(AdaptiveLogisticRegression.java:117)
at
org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression.train(AdaptiveLogisticRegression.java:103)
at
org.apache.mahout.classifier.sgd.TrainAdaptiveLogistic.main(TrainAdaptiveLogistic.java:72)
Caused by: java.lang.NullPointerException
at
org.apache.mahout.classifier.sgd.CrossFoldLearner.train(CrossFoldLearner.java:134)
at
org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression$Wrapper.train(AdaptiveLogisticRegression.java:411)
at
org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression$1.apply(AdaptiveLogisticRegression.java:128)
at
org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression$1.apply(AdaptiveLogisticRegression.java:1)
at
org.apache.mahout.ep.EvolutionaryProcess$1.call(EvolutionaryProcess.java:146)
at
org.apache.mahout.ep.EvolutionaryProcess$1.call(EvolutionaryProcess.java:1)
at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303)
at java.util.concurrent.FutureTask.run(FutureTask.java:138)
at
java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)