ehatcher 2004/03/12 07:52:59
Modified: contributions/analyzers/src/java/org/apache/lucene/analysis/br
BrazilianAnalyzer.java BrazilianStemFilter.java
contributions/analyzers/src/java/org/apache/lucene/analysis/cjk
CJKAnalyzer.java
contributions/analyzers/src/java/org/apache/lucene/analysis/cz
CzechAnalyzer.java
contributions/analyzers/src/java/org/apache/lucene/analysis/fr
FrenchAnalyzer.java FrenchStemFilter.java
contributions/analyzers/src/java/org/apache/lucene/analysis/nl
DutchAnalyzer.java DutchStemFilter.java
DutchStemmer.java
Log:
clean-up based on core changes to StopFilter
Revision Changes Path
1.5 +3 -2
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
Index: BrazilianAnalyzer.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- BrazilianAnalyzer.java 11 Mar 2004 03:05:36 -0000 1.4
+++ BrazilianAnalyzer.java 12 Mar 2004 15:52:58 -0000 1.5
@@ -65,6 +65,7 @@
import java.io.Reader;
import java.util.Hashtable;
import java.util.HashSet;
+import java.util.Set;
/**
* Analyzer for brazilian language. Supports an external list of stopwords (words
that
@@ -103,11 +104,11 @@
/**
* Contains the stopwords used with the StopFilter.
*/
- private HashSet stoptable = new HashSet();
+ private Set stoptable = new HashSet();
/**
* Contains words that should be indexed but not stemmed.
*/
- private HashSet excltable = new HashSet();
+ private Set excltable = new HashSet();
/**
* Builds an analyzer.
1.6 +52 -53
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
Index: BrazilianStemFilter.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- BrazilianStemFilter.java 11 Mar 2004 03:05:36 -0000 1.5
+++ BrazilianStemFilter.java 12 Mar 2004 15:52:58 -0000 1.6
@@ -57,72 +57,71 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+
import java.io.IOException;
-import java.util.Hashtable;
import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Set;
/**
* Based on (copied) the GermanStemFilter
*
- *
- * @author Jo�o Kramer
- *
- *
- * A filter that stemms german words. It supports a table of words that should
- * not be stemmed at all.
- *
- * @author Gerhard Schwarz
+ * @author Jo�o Kramer
+ * <p/>
+ * <p/>
+ * A filter that stemms german words. It supports a table of words that
should
+ * not be stemmed at all.
+ * @author Gerhard Schwarz
*/
public final class BrazilianStemFilter extends TokenFilter {
- /**
- * The actual token in the input stream.
- */
- private Token token = null;
- private BrazilianStemmer stemmer = null;
- private HashSet exclusions = null;
+ /**
+ * The actual token in the input stream.
+ */
+ private Token token = null;
+ private BrazilianStemmer stemmer = null;
+ private Set exclusions = null;
- public BrazilianStemFilter( TokenStream in ) {
+ public BrazilianStemFilter(TokenStream in) {
super(in);
- stemmer = new BrazilianStemmer();
- }
+ stemmer = new BrazilianStemmer();
+ }
- /**
- * Builds a BrazilianStemFilter that uses an exclusiontable.
- *
+ /**
+ * Builds a BrazilianStemFilter that uses an exclusiontable.
+ *
* @deprecated
- */
- public BrazilianStemFilter( TokenStream in, Hashtable exclusiontable ) {
- this( in );
- this.exclusions = new HashSet(exclusiontable.keySet());
- }
-
- public BrazilianStemFilter( TokenStream in, HashSet exclusiontable ) {
- this( in );
- this.exclusions = exclusiontable;
- }
-
- /**
- * @return Returns the next token in the stream, or null at EOS.
- */
- public final Token next()
- throws IOException {
- if ( ( token = input.next() ) == null ) {
- return null;
- }
- // Check the exclusiontable.
- else if ( exclusions != null && exclusions.contains( token.termText()
) ) {
- return token;
- }
- else {
- String s = stemmer.stem( token.termText() );
- // If not stemmed, dont waste the time creating a new token.
- if ( (s != null) && !s.equals( token.termText() ) ) {
- return new Token( s, 0, s.length(), token.type() );
- }
- return token;
- }
- }
+ */
+ public BrazilianStemFilter(TokenStream in, Hashtable exclusiontable) {
+ this(in);
+ this.exclusions = new HashSet(exclusiontable.keySet());
+ }
+
+ public BrazilianStemFilter(TokenStream in, Set exclusiontable) {
+ this(in);
+ this.exclusions = exclusiontable;
+ }
+
+ /**
+ * @return Returns the next token in the stream, or null at EOS.
+ */
+ public final Token next()
+ throws IOException {
+ if ((token = input.next()) == null) {
+ return null;
+ }
+ // Check the exclusiontable.
+ else if (exclusions != null && exclusions.contains(token.termText())) {
+ return token;
+ } else {
+ String s = stemmer.stem(token.termText());
+ // If not stemmed, dont waste the time creating a new token.
+ if ((s != null) && !s.equals(token.termText())) {
+ return new Token(s, 0, s.length(), token.type());
+ }
+ return token;
+ }
+ }
}
1.4 +55 -56
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
Index: CJKAnalyzer.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- CJKAnalyzer.java 11 Mar 2004 03:05:36 -0000 1.3
+++ CJKAnalyzer.java 12 Mar 2004 15:52:58 -0000 1.4
@@ -61,9 +61,7 @@
import org.apache.lucene.analysis.TokenStream;
import java.io.Reader;
-
-import java.util.Hashtable;
-import java.util.HashSet;
+import java.util.Set;
/**
@@ -72,57 +70,58 @@
* @author Che, Dong
*/
public class CJKAnalyzer extends Analyzer {
- //~ Static fields/initializers ---------------------------------------------
+ //~ Static fields/initializers ---------------------------------------------
- /**
- * An array containing some common English words that are not usually
- * useful for searching. and some double-byte interpunctions.....
- */
- private static String[] stopWords = {
- "a", "and", "are", "as", "at", "be",
- "but", "by", "for", "if", "in",
- "into", "is", "it", "no", "not",
- "of", "on", "or", "s", "such", "t",
- "that", "the", "their", "then",
- "there", "these", "they", "this",
- "to", "was", "will", "with", "",
- "www"
- };
-
- //~ Instance fields --------------------------------------------------------
-
- /** stop word list */
- private HashSet stopTable;
-
- //~ Constructors -----------------------------------------------------------
-
- /**
- * Builds an analyzer which removes words in STOP_WORDS.
- */
- public CJKAnalyzer() {
- stopTable = StopFilter.makeStopSet(stopWords);
- }
-
- /**
- * Builds an analyzer which removes words in the provided array.
- *
- * @param stopWords stop word array
- */
- public CJKAnalyzer(String[] stopWords) {
- stopTable = StopFilter.makeStopSet(stopWords);
- }
-
- //~ Methods ----------------------------------------------------------------
-
- /**
- * get token stream from input
- *
- * @param fieldName lucene field name
- * @param reader input reader
- *
- * @return TokenStream
- */
- public final TokenStream tokenStream(String fieldName, Reader reader) {
- return new StopFilter(new CJKTokenizer(reader), stopTable);
- }
+ /**
+ * An array containing some common English words that are not usually
+ * useful for searching. and some double-byte interpunctions.....
+ */
+ private static String[] stopWords = {
+ "a", "and", "are", "as", "at", "be",
+ "but", "by", "for", "if", "in",
+ "into", "is", "it", "no", "not",
+ "of", "on", "or", "s", "such", "t",
+ "that", "the", "their", "then",
+ "there", "these", "they", "this",
+ "to", "was", "will", "with", "",
+ "www"
+ };
+
+ //~ Instance fields --------------------------------------------------------
+
+ /**
+ * stop word list
+ */
+ private Set stopTable;
+
+ //~ Constructors -----------------------------------------------------------
+
+ /**
+ * Builds an analyzer which removes words in STOP_WORDS.
+ */
+ public CJKAnalyzer() {
+ stopTable = StopFilter.makeStopSet(stopWords);
+ }
+
+ /**
+ * Builds an analyzer which removes words in the provided array.
+ *
+ * @param stopWords stop word array
+ */
+ public CJKAnalyzer(String[] stopWords) {
+ stopTable = StopFilter.makeStopSet(stopWords);
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ /**
+ * get token stream from input
+ *
+ * @param fieldName lucene field name
+ * @param reader input reader
+ * @return TokenStream
+ */
+ public final TokenStream tokenStream(String fieldName, Reader reader) {
+ return new StopFilter(new CJKTokenizer(reader), stopTable);
+ }
}
1.4 +2 -1
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
Index: CzechAnalyzer.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- CzechAnalyzer.java 11 Mar 2004 03:05:36 -0000 1.3
+++ CzechAnalyzer.java 12 Mar 2004 15:52:58 -0000 1.4
@@ -65,6 +65,7 @@
import java.io.*;
import java.util.Hashtable;
import java.util.HashSet;
+import java.util.Set;
/**
* Analyzer for Czech language. Supports an external list of stopwords (words that
@@ -103,7 +104,7 @@
/**
* Contains the stopwords used with the StopFilter.
*/
- private HashSet stoptable;
+ private Set stoptable;
/**
* Builds an analyzer.
1.5 +110 -107
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
Index: FrenchAnalyzer.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- FrenchAnalyzer.java 11 Mar 2004 03:05:36 -0000 1.4
+++ FrenchAnalyzer.java 12 Mar 2004 15:52:58 -0000 1.5
@@ -58,14 +58,15 @@
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.de.WordlistLoader;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+
import java.io.File;
import java.io.Reader;
-import java.util.Hashtable;
import java.util.HashSet;
-
-import org.apache.lucene.analysis.de.WordlistLoader;
+import java.util.Hashtable;
+import java.util.Set;
/**
* Analyzer for french language. Supports an external list of stopwords (words that
@@ -74,115 +75,117 @@
* A default set of stopwords is used unless an other list is specified, the
* exclusionlist is empty by default.
*
- * @author Patrick Talbot (based on Gerhard Schwarz work for German)
- * @version $Id$
+ * @author Patrick Talbot (based on Gerhard Schwarz work for German)
+ * @version $Id$
*/
public final class FrenchAnalyzer extends Analyzer {
- /**
- * Extended list of typical french stopwords.
- */
- private String[] FRENCH_STOP_WORDS = {
- "a", "afin", "ai", "ainsi", "apr�s", "attendu", "au", "aujourd",
"auquel", "aussi",
- "autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant",
"avec", "avoir",
- "c", "car", "ce", "ceci", "cela", "celle", "celles", "celui",
"cependant", "certain",
- "certaine", "certaines", "certains", "ces", "cet", "cette", "ceux",
"chez", "ci",
- "combien", "comme", "comment", "concernant", "contre", "d", "dans",
"de", "debout",
- "dedans", "dehors", "del�", "depuis", "derri�re", "des", "d�sormais",
"desquelles",
- "desquels", "dessous", "dessus", "devant", "devers", "devra",
"divers", "diverse",
- "diverses", "doit", "donc", "dont", "du", "duquel", "durant", "d�s",
"elle", "elles",
- "en", "entre", "environ", "est", "et", "etc", "etre", "eu", "eux",
"except�", "hormis",
- "hors", "h�las", "hui", "il", "ils", "j", "je", "jusqu", "jusque",
"l", "la", "laquelle",
- "le", "lequel", "les", "lesquelles", "lesquels", "leur", "leurs",
"lorsque", "lui", "l�",
- "ma", "mais", "malgr�", "me", "merci", "mes", "mien", "mienne",
"miennes", "miens", "moi",
- "moins", "mon", "moyennant", "m�me", "m�mes", "n", "ne", "ni", "non",
"nos", "notre",
- "nous", "n�anmoins", "n�tre", "n�tres", "on", "ont", "ou", "outre",
"o�", "par", "parmi",
- "partant", "pas", "pass�", "pendant", "plein", "plus", "plusieurs",
"pour", "pourquoi",
- "proche", "pr�s", "puisque", "qu", "quand", "que", "quel", "quelle",
"quelles", "quels",
- "qui", "quoi", "quoique", "revoici", "revoil�", "s", "sa", "sans",
"sauf", "se", "selon",
- "seront", "ses", "si", "sien", "sienne", "siennes", "siens", "sinon",
"soi", "soit",
- "son", "sont", "sous", "suivant", "sur", "ta", "te", "tes", "tien",
"tienne", "tiennes",
- "tiens", "toi", "ton", "tous", "tout", "toute", "toutes", "tu", "un",
"une", "va", "vers",
- "voici", "voil�", "vos", "votre", "vous", "vu", "v�tre", "v�tres",
"y", "�", "�a", "�s",
- "�t�", "�tre", "�"
- };
-
- /**
- * Contains the stopwords used with the StopFilter.
- */
- private HashSet stoptable = new HashSet();
- /**
- * Contains words that should be indexed but not stemmed.
- */
- private HashSet excltable = new HashSet();
-
- /**
- * Builds an analyzer.
- */
- public FrenchAnalyzer() {
- stoptable = StopFilter.makeStopSet( FRENCH_STOP_WORDS );
- }
-
- /**
- * Builds an analyzer with the given stop words.
- */
- public FrenchAnalyzer( String[] stopwords ) {
- stoptable = StopFilter.makeStopSet( stopwords );
- }
+ /**
+ * Extended list of typical french stopwords.
+ */
+ private String[] FRENCH_STOP_WORDS = {
+ "a", "afin", "ai", "ainsi", "apr�s", "attendu", "au", "aujourd", "auquel",
"aussi",
+ "autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec",
"avoir",
+ "c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", "cependant",
"certain",
+ "certaine", "certaines", "certains", "ces", "cet", "cette", "ceux", "chez",
"ci",
+ "combien", "comme", "comment", "concernant", "contre", "d", "dans", "de",
"debout",
+ "dedans", "dehors", "del�", "depuis", "derri�re", "des", "d�sormais",
"desquelles",
+ "desquels", "dessous", "dessus", "devant", "devers", "devra", "divers",
"diverse",
+ "diverses", "doit", "donc", "dont", "du", "duquel", "durant", "d�s", "elle",
"elles",
+ "en", "entre", "environ", "est", "et", "etc", "etre", "eu", "eux", "except�",
"hormis",
+ "hors", "h�las", "hui", "il", "ils", "j", "je", "jusqu", "jusque", "l", "la",
"laquelle",
+ "le", "lequel", "les", "lesquelles", "lesquels", "leur", "leurs", "lorsque",
"lui", "l�",
+ "ma", "mais", "malgr�", "me", "merci", "mes", "mien", "mienne", "miennes",
"miens", "moi",
+ "moins", "mon", "moyennant", "m�me", "m�mes", "n", "ne", "ni", "non", "nos",
"notre",
+ "nous", "n�anmoins", "n�tre", "n�tres", "on", "ont", "ou", "outre", "o�",
"par", "parmi",
+ "partant", "pas", "pass�", "pendant", "plein", "plus", "plusieurs", "pour",
"pourquoi",
+ "proche", "pr�s", "puisque", "qu", "quand", "que", "quel", "quelle", "quelles",
"quels",
+ "qui", "quoi", "quoique", "revoici", "revoil�", "s", "sa", "sans", "sauf",
"se", "selon",
+ "seront", "ses", "si", "sien", "sienne", "siennes", "siens", "sinon", "soi",
"soit",
+ "son", "sont", "sous", "suivant", "sur", "ta", "te", "tes", "tien", "tienne",
"tiennes",
+ "tiens", "toi", "ton", "tous", "tout", "toute", "toutes", "tu", "un", "une",
"va", "vers",
+ "voici", "voil�", "vos", "votre", "vous", "vu", "v�tre", "v�tres", "y", "�",
"�a", "�s",
+ "�t�", "�tre", "�"
+ };
+
+ /**
+ * Contains the stopwords used with the StopFilter.
+ */
+ private Set stoptable = new HashSet();
+ /**
+ * Contains words that should be indexed but not stemmed.
+ */
+ private Set excltable = new HashSet();
+
+ /**
+ * Builds an analyzer.
+ */
+ public FrenchAnalyzer() {
+ stoptable = StopFilter.makeStopSet(FRENCH_STOP_WORDS);
+ }
+
+ /**
+ * Builds an analyzer with the given stop words.
+ */
+ public FrenchAnalyzer(String[] stopwords) {
+ stoptable = StopFilter.makeStopSet(stopwords);
+ }
- /**
- * Builds an analyzer with the given stop words.
+ /**
+ * Builds an analyzer with the given stop words.
*
* @deprecated
- */
- public FrenchAnalyzer( Hashtable stopwords ) {
- stoptable = new HashSet(stopwords.keySet());
- }
-
- /**
- * Builds an analyzer with the given stop words.
- */
- public FrenchAnalyzer( File stopwords ) {
- stoptable = new HashSet(WordlistLoader.getWordtable( stopwords
).keySet());
- }
-
- /**
- * Builds an exclusionlist from an array of Strings.
- */
- public void setStemExclusionTable( String[] exclusionlist ) {
- excltable = StopFilter.makeStopSet( exclusionlist );
- }
- /**
- * Builds an exclusionlist from a Hashtable.
- */
- public void setStemExclusionTable( Hashtable exclusionlist ) {
- excltable = new HashSet(exclusionlist.keySet());
- }
- /**
- * Builds an exclusionlist from the words contained in the given file.
- */
- public void setStemExclusionTable( File exclusionlist ) {
- excltable = new HashSet(WordlistLoader.getWordtable( exclusionlist
).keySet());
- }
-
- /**
- * Creates a TokenStream which tokenizes all the text in the provided Reader.
- *
- * @return A TokenStream build from a StandardTokenizer filtered with
- * StandardFilter, StopFilter, FrenchStemFilter and
LowerCaseFilter
- */
- public final TokenStream tokenStream( String fieldName, Reader reader ) {
-
- if (fieldName==null) throw new IllegalArgumentException("fieldName
must not be null");
- if (reader==null) throw new IllegalArgumentException("readermust not
be null");
-
- TokenStream result = new StandardTokenizer( reader );
- result = new StandardFilter( result );
- result = new StopFilter( result, stoptable );
- result = new FrenchStemFilter( result, excltable );
- // Convert to lowercase after stemming!
- result = new LowerCaseFilter( result );
- return result;
- }
+ */
+ public FrenchAnalyzer(Hashtable stopwords) {
+ stoptable = new HashSet(stopwords.keySet());
+ }
+
+ /**
+ * Builds an analyzer with the given stop words.
+ */
+ public FrenchAnalyzer(File stopwords) {
+ stoptable = new HashSet(WordlistLoader.getWordtable(stopwords).keySet());
+ }
+
+ /**
+ * Builds an exclusionlist from an array of Strings.
+ */
+ public void setStemExclusionTable(String[] exclusionlist) {
+ excltable = StopFilter.makeStopSet(exclusionlist);
+ }
+
+ /**
+ * Builds an exclusionlist from a Hashtable.
+ */
+ public void setStemExclusionTable(Hashtable exclusionlist) {
+ excltable = new HashSet(exclusionlist.keySet());
+ }
+
+ /**
+ * Builds an exclusionlist from the words contained in the given file.
+ */
+ public void setStemExclusionTable(File exclusionlist) {
+ excltable = new HashSet(WordlistLoader.getWordtable(exclusionlist).keySet());
+ }
+
+ /**
+ * Creates a TokenStream which tokenizes all the text in the provided Reader.
+ *
+ * @return A TokenStream build from a StandardTokenizer filtered with
+ * StandardFilter, StopFilter, FrenchStemFilter and LowerCaseFilter
+ */
+ public final TokenStream tokenStream(String fieldName, Reader reader) {
+
+ if (fieldName == null) throw new IllegalArgumentException("fieldName must not
be null");
+ if (reader == null) throw new IllegalArgumentException("readermust not be
null");
+
+ TokenStream result = new StandardTokenizer(reader);
+ result = new StandardFilter(result);
+ result = new StopFilter(result, stoptable);
+ result = new FrenchStemFilter(result, excltable);
+ // Convert to lowercase after stemming!
+ result = new LowerCaseFilter(result);
+ return result;
+ }
}
1.4 +3 -2
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
Index: FrenchStemFilter.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- FrenchStemFilter.java 11 Mar 2004 03:05:36 -0000 1.3
+++ FrenchStemFilter.java 12 Mar 2004 15:52:59 -0000 1.4
@@ -60,6 +60,7 @@
import java.io.IOException;
import java.util.Hashtable;
import java.util.HashSet;
+import java.util.Set;
/**
* A filter that stemms french words. It supports a table of words that should
@@ -75,7 +76,7 @@
*/
private Token token = null;
private FrenchStemmer stemmer = null;
- private HashSet exclusions = null;
+ private Set exclusions = null;
public FrenchStemFilter( TokenStream in ) {
super(in);
@@ -92,7 +93,7 @@
exclusions = new HashSet(exclusiontable.keySet());
}
- public FrenchStemFilter( TokenStream in, HashSet exclusiontable ) {
+ public FrenchStemFilter( TokenStream in, Set exclusiontable ) {
this( in );
exclusions = exclusiontable;
}
1.3 +5 -3
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
Index: DutchAnalyzer.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- DutchAnalyzer.java 11 Mar 2004 03:05:36 -0000 1.2
+++ DutchAnalyzer.java 12 Mar 2004 15:52:59 -0000 1.3
@@ -26,6 +26,8 @@
import java.io.Reader;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.Set;
+import java.util.Map;
/**
* @author Edwin de Jonge
@@ -61,14 +63,14 @@
/**
* Contains the stopwords used with the StopFilter.
*/
- private HashSet stoptable = new HashSet();
+ private Set stoptable = new HashSet();
/**
* Contains words that should be indexed but not stemmed.
*/
- private HashSet excltable = new HashSet();
+ private Set excltable = new HashSet();
- private HashMap _stemdict = new HashMap();
+ private Map _stemdict = new HashMap();
/**
1.3 +5 -3
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
Index: DutchStemFilter.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- DutchStemFilter.java 11 Mar 2004 03:05:36 -0000 1.2
+++ DutchStemFilter.java 12 Mar 2004 15:52:59 -0000 1.3
@@ -23,6 +23,8 @@
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.Set;
+import java.util.Map;
/**
* @author Edwin de Jonge
@@ -37,7 +39,7 @@
*/
private Token token = null;
private DutchStemmer stemmer = null;
- private HashSet exclusions = null;
+ private Set exclusions = null;
public DutchStemFilter(TokenStream _in) {
super(_in);
@@ -47,7 +49,7 @@
/**
* Builds a DutchStemFilter that uses an exclusiontable.
*/
- public DutchStemFilter(TokenStream _in, HashSet exclusiontable) {
+ public DutchStemFilter(TokenStream _in, Set exclusiontable) {
this(_in);
exclusions = exclusiontable;
}
@@ -55,7 +57,7 @@
/**
* @param stemdictionary Dictionary of word stem pairs, that overrule the
algorithm
*/
- public DutchStemFilter(TokenStream _in, HashSet exclusiontable, HashMap
stemdictionary) {
+ public DutchStemFilter(TokenStream _in, Set exclusiontable, Map stemdictionary) {
this(_in, exclusiontable);
stemmer.setStemDictionary(stemdictionary);
}
1.3 +3 -3
jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java
Index: DutchStemmer.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- DutchStemmer.java 11 Mar 2004 03:05:36 -0000 1.2
+++ DutchStemmer.java 12 Mar 2004 15:52:59 -0000 1.3
@@ -16,7 +16,7 @@
* limitations under the License.
*/
-import java.util.HashMap;
+import java.util.Map;
/*
* @author Edwin de Jonge ([EMAIL PROTECTED])
@@ -32,7 +32,7 @@
*/
private StringBuffer sb = new StringBuffer();
private boolean _removedE;
- private HashMap _stemDict;
+ private Map _stemDict;
private int _R1;
private int _R2;
@@ -399,7 +399,7 @@
return false;
}
- void setStemDictionary(HashMap dict) {
+ void setStemDictionary(Map dict) {
_stemDict = dict;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]