/**
 * Dutch analyzer - with a complete list of stopwords...
 *
 * @author    Maurits van Wijland
 */

package org.apache.lucene.analysis.nl;

import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.analysis.nl.*;


import java.io.Reader;
import java.util.Hashtable;

/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
 * LowerCaseFilter} and {@link StopFilter}. */
public final class DutchAnalyzer extends Analyzer {
  private Hashtable stopTable;

  /** An array containing some common English words that are not usually useful
    for searching. */
  public static final String[] STOP_WORDS_NL = {
	"aan","acht","achter","af","al","aldus","algemene","alle",
	"alleen","allerlei","alles","als","altijd","ander","andere",
	"anderen","anders","behalve","beide","ben","bent","best",
	"beste","beter","bij","bijna","bijvoorbeeld","binnen",
	"boven","bovendien","daar","daarbij","daardoor","daarin",
	"daarmee","daarna","daarnaast","daarom","daarop","daartoe",
	"daarvan","daarvoor","dagelijks","dan","dat","de","den","der",
	"derde","dertig","des","deze","dezelfde","die","dit","doen",
	"door","doordat","drie","duidelijk","duizend","dus","echt",
	"echter","een","eens","eenvoudig","eerder","eerst","eerste",
	"eigen","eigenlijk","elk","elkaar","elke","en","end","ene",
	"enige","enigszins","enkele","enorme","er","erg","erop",
	"ervan","ervoor","even","evenwel","extra","flink","gaan",
	"ge","geen","genoeg","geven","geweest","gewoon","geworden",
	"goed","goede","graag","groot","grootste","grote","grotere",
	"haar","had","hadden","heb","hebben","hebt","heeft","heen",
	"heet","helaas","hele","hem","hen","het","hetgeen","hetzelfde",
	"hier","hierbij","hierdoor","hiermee","hiervoor","hij","hoe",
	"hoeven","hoewel","honderd","huidige","hun","ieder","iedere",
	"iedereen","iemand","iets","ik","immers","in","inmiddels","is",
	"jaar","je","juist","juiste","kan","klein","kleine","kleinere",
	"komen","komt","kon","konden","kort","korte","kun","kunnen",
	"kunt","laatste","lange","langs","later","liefst","maakt","maar",
	"me","mede","mee","meer","meerdere","meest","meeste","men",
	"met","meteen","midden","mij","mijn","minder","misschien","moet",
	"mogelijk","momenteel","na","naar","naast","nadat","namelijk",
	"nauwelijks","net","niet","niets","nieuw","nieuwe","nodig","nog",
	"nogal","nooit","nu","of","om","omdat","ondanks","onder","ongeveer",
	"onlangs","ons","onze","ooit","ook","op","open","opnieuw","opzichte",
	"over","overal","overigens","per","precies","pro","redelijk","reeds",
	"regelmatig","respectievelijk","sinds","slechts","snel","sommige",
	"soms","steeds","tal","te","tegelijk","tegelijkertijd","tegen",
	"tegenover","ten","ter","terecht","terug","terwijl","tevens",
	"tien","toch","toe","toen","tot","totaal","totale","tussen","twaalf",
	"twee","tweede","twintig","u","uit","uiteindelijk","uw","vaak","van",
	"vanaf","vanuit","vanwege","vast","veel","veertig","vele","ver",
	"verder","verschillende","via","vier","vierde","vijf","vijftien",
	"vijftig","voldoende","volgend","volgende","volgens","volop",
	"voor","vooral","vooralsnog","voordat","voorlopig","voortdurend",
	"vorig","vrijwel","waar","waarbij","waardoor","waarin","waarmee",
	"waarom","waaronder","waarop","waarschijnlijk","waarvan","waarvoor",
	"wanneer","want","was","wat","we","weer","weet","weinig","wel",
	"welk","welke","wellicht","werd","werden","werken","wie","wij",
	"wil","wilde","worden","wordt","zal","ze","zeer","zegt","zei",
	"zeker","zelf","zelfs","zes","zetten","zich","zichzelf","zien",
	"zij","zijn","zo","zoals","zodat","zodra","zolang","zonder","zou",
	"zouden","zoveel","zowel","zullen"
  };

  /** Builds an analyzer. */
  public DutchAnalyzer() {
    this(STOP_WORDS_NL);
  }

  public DutchAnalyzer(String[] stopWords) {
    stopTable = StopFilter.makeStopTable(stopWords);
  }


  public final TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(reader);
    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);
    result = new StopFilter(result, stopTable);
    result = new DutchStemFilter(result);
    return result;
  }
}