/**
 * A filter that stemms dutch words. 
 *
 * @author    Maurits van Wijland
 */
package org.apache.lucene.analysis.nl;

import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import java.io.IOException;
import java.util.Hashtable;

public final class DutchStemFilter extends TokenFilter {

	/**
	 * The actual token in the input stream.
	 */
	private Token token = null;
	private DutchStemmer stemmer = null;
	private Hashtable exclusions = null;
	
	public DutchStemFilter( TokenStream in ) {
		stemmer = new DutchStemmer();
		input = in;
	}
	
	
	/**
	 * @return  Returns the next token in the stream, or null at EOS.
	 */
	public final Token next()
		throws IOException {
		if ( ( token = input.next() ) == null ) {
			return null;
		}
		// Check the exclusiontable.
		else if ( exclusions != null && exclusions.contains( token.termText() ) ) {
			return token;
		}
		else {
			String s = stemmer.stem( token.termText() );
			// If not stemmed, dont waste the time creating a new token.
			if ( !s.equals( token.termText() ) ) {
				return new Token( s, 0, s.length(), token.type() );
			}
			return token;
		}
	}
}


