Hello I created for a project a new class that extends TokenFilter but I have a big problem... I have a dictionary of terms loaded using an Hashset, but this terms have more than one token (for example "computer science") so I can only search 1-word token (I tried adding one invented by me) and it works but don't finds terms with more than one word can someone help me please? It's urgent... I post my code thanks to everyone!
import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.HashSet; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; public class TermTokenFilter extends TokenFilter { protected HashSet terms = new HashSet(); public TermTokenFilter(TokenStream input) throws IOException { super(input); //apro i file //ricontrollare se input o cosa va messo. File f = new File("ADDRESS OF FILE TXT WITH TERMS\\TERMS.txt"); FileInputStream fip = new FileInputStream(f); BufferedReader d = new BufferedReader(new InputStreamReader(fip)); String readLine; while((readLine = d.readLine())!=null) { terms.add(readLine); } } public Token next(Token result) throws IOException { while((result = input.next(result)) != null) { if (terms.contains(new String(result.termBuffer(), 0, result.termLength()))) { return result; } } return null; } } -- View this message in context: http://www.nabble.com/Using-more-tokens-in-TokenFilter-%3A%28-tp17238836p17238836.html Sent from the Lucene - Java Users mailing list archive at Nabble.com.