Hi,
I'm using Lucene.Net along with snowball stemming to index text from a
database. The class Lucene.Net.Analysis.Snowball.SnowballFilter uses the
reflection API and the invoke method to call the stem methods of snowball. I
have written a Snowball filter which creates a delegate and uses this delegate
to stem the words afterwards. This approach improves the indexing speed of my
indexing program by about 10%. I would be happy if you include this code into
lucene.net.
With kind Regards,
Arian
Code:
using System;
using Lucene.Net.Analysis;
using SF.Snowball;
using SF.Snowball.Ext;
namespace Index.Search.Analyzers
{
/// <summary>A filter that stems words using a Snowball-generated
stemmer.
///
/// Available stemmers are listed in {...@link SF.Snowball.Ext}. The
name of a
/// stemmer is the part of the class name before "Stemmer", e.g., the
stemmer in
/// {...@link EnglishStemmer} is named "English".
/// </summary>
public class FailOverSnowballFilter : TokenFilter
{
private static readonly System.Object[] EMPTY_ARGS = new
System.Object[0];
string stemmerName = string.Empty;
private delegate bool BoolVoidDelegate();
private BoolVoidDelegate tehMeth0d;
private SnowballProgram stemmer;
private System.Reflection.MethodInfo stemMethod;
/// <summary>Construct a stemmer for a certain language.
///
/// </summary>
/// <param name="in">the input tokens to stem
/// </param>
/// <param name="name">the language name of a stemmer
/// </param>
public FailOverSnowballFilter(TokenStream in_Renamed,
System.String name)
: base(in_Renamed)
{
stemmerName = name + "Stemmer";
try
{
stemmer =
(SnowballProgram)Activator.CreateInstance("Snowball.Net", "SF.Snowball.Ext." +
stemmerName).Unwrap();
stemMethod =
stemmer.GetType().GetMethod("Stem", (new Type[0] == null) ? new Type[0] :
(Type[])new Type[0]);
tehMeth0d =
(BoolVoidDelegate)Delegate.CreateDelegate(typeof(BoolVoidDelegate), stemmer,
stemMethod);
}
catch (System.Exception e)
{
throw new System.SystemException(e.ToString());
}
}
/// <summary>Returns the next input Token, after being stemmed
</summary>
public override Token Next()
{
Token token = input.Next();
if (token == null)
return null;
stemmer.SetCurrent(token.TermText());
try
{
tehMeth0d();
//stemMethod.Invoke(stemmer,
(System.Object[])EMPTY_ARGS);
}
catch (System.Exception e)
{
Console.WriteLine(string.Format( "{0} was not
able to stemm token \"{1}\", using token directly.\n {2}", stemmerName,
token.TermText(), e.ToString()));
}
Token newToken = new Token(stemmer.GetCurrent(),
token.StartOffset(), token.EndOffset(), token.Type());
newToken.SetPositionIncrement(token.GetPositionIncrement());
return newToken;
}
}
}
---------------------------------------------------------------------------
An- und Abmeldung zur SCHEMA Mailingliste unter http://www.schema.de/mail
---------------------------------------------------------------------------