Hi list,
I am not sure how to report bugs, or even if anybody is interested in
bug reports. However, I have been playing with lucene lately, and found
out an implementation bug in the Frenchstemmer
(/src/contrib/Analyzers/Fr/FrenchStemmer.cs). Whenever I tried to add a
new document to an index, I got an index out of range error. So I looked
at the code and fixed that issue: see my diff file attached.
Please note that I also changed a few funky characters to unicode
notation. The code worked well with the funky characters, but I think it
just looks better with the \uxxx bits...
Anyways, the important bits is the replacement of a couple of sb.Insert
by sb.Append.
I hope this helps.
Cheers,
Sylvain
@@ -179,9 +179,9 @@
DeleteFromIfTestVowelBeforeIn( R1, new String[] { "issements",
"issement" }, false, R0 );
DeleteFrom( RV, new String[] { "ements", "ement" } );
- DeleteButSuffixFromElseReplace( R2, new String[] { "ités",
"ité" }, "abil", false, R0, "abl" );
- DeleteButSuffixFromElseReplace( R2, new String[] { "ités",
"ité" }, "ic", false, R0, "iqU" );
- DeleteButSuffixFrom( R2, new String[] { "ités", "ité" },
"iv", true );
+ DeleteButSuffixFromElseReplace( R2, new String[] { "it\u00e9s",
"it\u00e9" }, "abil", false, R0, "abl" );
+ DeleteButSuffixFromElseReplace( R2, new String[] { "it\u00e9s",
"it\u00e9" }, "ic", false, R0, "iqU" );
+ DeleteButSuffixFrom( R2, new String[] { "it\u00e9s", "it\u00e9"
}, "iv", true );
String[] autre = { "ifs", "ives", "if", "ive" };
DeleteButSuffixFromElseReplace( R2, autre, "icat", false, R0,
"iqU" );
@@ -218,11 +218,11 @@
* @return bool - true if something changed in the StringBuilder
*/
private bool Step2A() {
- String[] search = { "îmes", "îtes", "iraIent", "irait",
"irais", "irai", "iras", "ira",
+ String[] search = { "\u00eemes", "\u00eetes", "iraIent",
"irait", "irais", "irai", "iras", "ira",
"irent", "iriez",
"irez", "irions", "irons", "iront",
"issaIent", "issais",
"issantes", "issante", "issants", "issant",
"issait", "issais",
"issions", "issons", "issiez", "issez", "issent",
- "isses", "isse", "ir",
"is", "ît", "it", "ies", "ie", "i" };
+ "isses", "isse", "ir",
"is", "\u00eet", "it", "ies", "ie", "i" };
return DeleteFromIfTestVowelBeforeIn( RV, search, false, RV );
}
@@ -233,13 +233,13 @@
*/
private void Step2B() {
String[] suffix = { "eraIent", "erais", "erait", "erai",
"eras", "erions", "eriez",
- "erons",
"eront","erez", "èrent", "era", "ées", "iez",
- "ée", "és", "er",
"ez", "é" };
+ "erons",
"eront","erez", "\u00e8rent", "era", "\u00e9es", "iez",
+ "\u00e9e", "\u00e9s",
"er", "ez", "\u00e9" };
DeleteFrom( RV, suffix );
String[] search = { "assions", "assiez", "assent", "asses",
"asse", "aIent",
- "antes", "aIent",
"Aient", "ante", "âmes", "âtes", "ants", "ant",
- "ait", "aît", "ais",
"Ait", "Aît", "Ais", "ât", "as", "ai", "Ai", "a" };
+ "antes", "aIent",
"Aient", "ante", "\u00e2mes", "\u00e2tes", "ants", "ant",
+ "ait", "a\u00eet",
"ais", "Ait", "A\u00eet", "Ais", "\u00e2t", "as", "ai", "Ai", "a" };
DeleteButSuffixFrom( RV, search, "e", true );
DeleteFrom( R2, new String[] { "ions" } );
@@ -288,9 +288,9 @@
if (!found)
found = DeleteFromIfPrecededIn( R2, new String[] { "ion" }, RV,
"t" );
- ReplaceFrom( RV, new String[] { "Ière", "ière", "Ier",
"ier" }, "i" );
+ ReplaceFrom( RV, new String[] { "I\u00e8re", "i\u00e8re",
"Ier", "ier" }, "i" );
DeleteFrom( RV, new String[] { "e" } );
- DeleteFromIfPrecededIn( RV, new String[] { "ë" }, R0, "gu" );
+ DeleteFromIfPrecededIn( RV, new String[] { "\u00eb" }, R0, "gu"
);
}
/**
@@ -463,7 +463,8 @@
else if ( from!=null && from.EndsWith( prefix +
search[i] ))
{
sb.Length = sb.Length - (prefix.Length +
search[i].Length);
- sb.Insert(sb.Length - (prefix.Length +
search[i].Length), replace );
+ sb.Append(replace);
+ //sb.Insert(sb.Length - (prefix.Length
+ search[i].Length), replace );
modified = true;
SetStrings();
break;
@@ -494,7 +495,8 @@
if ( source.EndsWith( search[i] ))
{
sb.Length = sb.Length - search[i].Length;
- sb.Insert(sb.Length - search[i].Length,
replace ); // replacement for java's replace
+ sb.Append(replace);
+ //sb.Insert(sb.Length - search[i].Length,
replace ); // replacement for java's replace
modified = true;
found = true;
SetStrings();