OK here is what i have so far...a new class called TokenNameFinderEvaluatorMultiple (what a mouthful!)
The class is attached...

Basically what it does is maintaining 2 name finders (only need 2 atm) and merges their results just before they are sent to the evaluator...i wan't sure however whether i 'm supposed to keep the duplicates when merging the Span arrays...also does the order matter or can i use a set to remove duplicates if needed? that will mess up the order though!

Jim

p.s. I'm not sure how to quickly test it though without potentially messing up my project...any chance you could test it? i don't see any reason why it shouldn't work (assuming duplicates are to be kept)...
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package opennlp.tools.namefind;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

import opennlp.tools.cmdline.PerformanceMonitor;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
import opennlp.tools.util.eval.Evaluator;
import opennlp.tools.util.eval.FMeasure;

/**
 * The {@link TokenNameFinderEvaluator} measures the performance
 * of the given {@link TokenNameFinder} with the provided
 * reference {@link NameSample}s.
 *
 * @see Evaluator
 * @see TokenNameFinder
 * @see NameSample
 */
public class TokenNameFinderEvaluatorMultiple extends Evaluator<NameSample> {

  private FMeasure fmeasure = new FMeasure();
  
  /**
   * The {@link TokenNameFinder} used to create the predicted
   * {@link NameSample} objects.
   */
  private TokenNameFinder nameFinder1; //e.g. maxent
  private TokenNameFinder nameFinder2; //e.g. dictionary
  
  /**
   * Initializes the current instance with the given
   * {@link TokenNameFinder}.
   *
   * @param nameFinder the {@link TokenNameFinder} to evaluate.
   * @param listeners evaluation sample listeners 
   */
  public TokenNameFinderEvaluator(TokenNameFinder ... nameFinder, TokenNameFinderEvaluationMonitor ... listeners) {
    super(listeners);
    this.nameFinder1 = nameFinder[0];
    this.nameFinder2 = nameFinder[1];//we can have more nameFinders
  }

  /**
   * Evaluates the given reference {@link NameSample} object.
   *
   * This is done by finding the names with the
   * {@link TokenNameFinder} in the sentence from the reference
   * {@link NameSample}. The found names are then used to
   * calculate and update the scores.
   *
   * @param reference the reference {@link NameSample}.
   * 
   * @return the predicted {@link NameSample}.
   */
  @Override
  protected NameSample processSample(NameSample reference) {
    
    if (reference.isClearAdaptiveDataSet()) {
      nameFinder1.clearAdaptiveData();//clearAdaptiveData for all namefinders
      nameFinder1.clearAdaptiveData();
    }
    
    Span[] predictedNames1 = nameFinder1.find(reference.getSentence()); // predict 1st
    Span[] predictedNames2 = nameFinder2.find(reference.getSentence()); // predict 2nd
    Span[] predictedTotal = mergeSpans(predictedNames1, predictedNames2); //merge predictions - not sure whether to keep or remove duplicates
    Span[] references = reference.getNames();
    
  //helper function for array concat
   private Span[] mergeSpans(Span[] x, Span[] y){
    
   Span[] temp= new Span[x.length + y.length];
   System.arraycopy(x, 0, temp, 0, x.length);
   System.arraycopy(B, 0, temp, x.length, y.length);

   return temp;
 
    }

    // OPENNLP-396 When evaluating with a file in the old format
    // the type of the span is null, but must be set to default to match
    // the output of the name finder.
    for (int i = 0; i < references.length; i++) {
      if (references[i].getType() == null) {
        references[i] = new Span(references[i].getStart(), references[i].getEnd(), "default");
      }
    }
    
    fmeasure.updateScores(references, predictedTotal);//use predicted total here
    
    return new NameSample(reference.getSentence(), predictedTotal, reference.isClearAdaptiveDataSet());//and here
  }
  
  public FMeasure getFMeasure() {
    return fmeasure;
  }
// all changes are above this line -----------------------------------------------------------------------------------  
  @Deprecated
  public static void main(String[] args) throws IOException, 
      InvalidFormatException {
    
    if (args.length == 4) {
      
      System.out.println("Loading name finder model ...");
      InputStream modelIn = new FileInputStream(args[3]);
      
      TokenNameFinderModel model = new TokenNameFinderModel(modelIn);
      
      TokenNameFinder nameFinder = new NameFinderME(model);
      
      System.out.println("Performing evaluation ...");
      TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(nameFinder);
      
      final NameSampleDataStream sampleStream = new NameSampleDataStream(
          new PlainTextByLineStream(new InputStreamReader(new FileInputStream(args[2]), args[1])));
      
      final PerformanceMonitor monitor = new PerformanceMonitor("sent");
      
      monitor.startAndPrintThroughput();
      
      ObjectStream<NameSample> iterator = new ObjectStream<NameSample>() {

        public NameSample read() throws IOException {
          monitor.incrementCounter();
          return sampleStream.read();
        }
        
        public void reset() throws IOException {
          sampleStream.reset();
        }
        
        public void close() throws IOException {
          sampleStream.close();
        }
      };
      
      evaluator.evaluate(iterator);
      
      monitor.stopAndPrintFinalResult();
      
      System.out.println();
      System.out.println("F-Measure: " + evaluator.getFMeasure().getFMeasure());
      System.out.println("Recall: " + evaluator.getFMeasure().getRecallScore());
      System.out.println("Precision: " + evaluator.getFMeasure().getPrecisionScore());
    }
    else {
      // usage: -encoding code test.file model.file
    }
  }
}

Reply via email to