Hi list,

I am not sure how to report bugs, or even if anybody is interested in bug reports. However, I have been playing with lucene lately, and found out an implementation bug in the Frenchstemmer (/src/contrib/Analyzers/Fr/FrenchStemmer.cs). Whenever I tried to add a new document to an index, I got an index out of range error. So I looked at the code and fixed that issue: see my diff file attached.

Please note that I also changed a few funky characters to unicode notation. The code worked well with the funky characters, but I think it just looks better with the \uxxx bits...

Anyways, the important bits is the replacement of a couple of sb.Insert by sb.Append.

I hope this helps.

Cheers,
Sylvain
@@ -179,9 +179,9 @@
                DeleteFromIfTestVowelBeforeIn( R1, new String[] { "issements", 
"issement" }, false, R0 );
                DeleteFrom( RV, new String[] { "ements", "ement" } );
 
-               DeleteButSuffixFromElseReplace( R2, new String[] { "ités", 
"ité" }, "abil", false, R0, "abl" );
-               DeleteButSuffixFromElseReplace( R2, new String[] { "ités", 
"ité" }, "ic", false, R0, "iqU" );
-               DeleteButSuffixFrom( R2, new String[] { "ités", "ité" }, 
"iv", true );
+               DeleteButSuffixFromElseReplace( R2, new String[] { "it\u00e9s", 
"it\u00e9" }, "abil", false, R0, "abl" );
+               DeleteButSuffixFromElseReplace( R2, new String[] { "it\u00e9s", 
"it\u00e9" }, "ic", false, R0, "iqU" );
+               DeleteButSuffixFrom( R2, new String[] { "it\u00e9s", "it\u00e9" 
}, "iv", true );
 
                String[] autre = { "ifs", "ives", "if", "ive" };
                DeleteButSuffixFromElseReplace( R2, autre, "icat", false, R0, 
"iqU" );
@@ -218,11 +218,11 @@
         * @return bool - true if something changed in the StringBuilder
         */
        private bool Step2A() {
-               String[] search = { "îmes", "îtes", "iraIent", "irait", 
"irais", "irai", "iras", "ira",
+               String[] search = { "\u00eemes", "\u00eetes", "iraIent", 
"irait", "irais", "irai", "iras", "ira",
                                                        "irent", "iriez", 
"irez", "irions", "irons", "iront",
                                                        "issaIent", "issais", 
"issantes", "issante", "issants", "issant",
                                                        "issait", "issais", 
"issions", "issons", "issiez", "issez", "issent",
-                                                       "isses", "isse", "ir", 
"is", "ît", "it", "ies", "ie", "i" };
+                                                       "isses", "isse", "ir", 
"is", "\u00eet", "it", "ies", "ie", "i" };
                return DeleteFromIfTestVowelBeforeIn( RV, search, false, RV );
        }
 
@@ -233,13 +233,13 @@
         */
        private void Step2B() {
                String[] suffix = { "eraIent", "erais", "erait", "erai", 
"eras", "erions", "eriez",
-                                                       "erons", 
"eront","erez", "èrent", "era", "ées", "iez",
-                                                       "ée", "és", "er", 
"ez", "é" };
+                                                       "erons", 
"eront","erez", "\u00e8rent", "era", "\u00e9es", "iez",
+                                                       "\u00e9e", "\u00e9s", 
"er", "ez", "\u00e9" };
                DeleteFrom( RV, suffix );
 
                String[] search = { "assions", "assiez", "assent", "asses", 
"asse", "aIent",
-                                                       "antes", "aIent", 
"Aient", "ante", "âmes", "âtes", "ants", "ant",
-                                                       "ait", "aît", "ais", 
"Ait", "Aît", "Ais", "ât", "as", "ai", "Ai", "a" };
+                                                       "antes", "aIent", 
"Aient", "ante", "\u00e2mes", "\u00e2tes", "ants", "ant",
+                                                       "ait", "a\u00eet", 
"ais", "Ait", "A\u00eet", "Ais", "\u00e2t", "as", "ai", "Ai", "a" };
                DeleteButSuffixFrom( RV, search, "e", true );
 
                DeleteFrom( R2, new String[] { "ions" } );
@@ -288,9 +288,9 @@
                if (!found)
                found = DeleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, 
"t" );
 
-               ReplaceFrom( RV, new String[] { "Ière", "ière", "Ier", 
"ier" }, "i" );
+               ReplaceFrom( RV, new String[] { "I\u00e8re", "i\u00e8re", 
"Ier", "ier" }, "i" );
                DeleteFrom( RV, new String[] { "e" } );
-               DeleteFromIfPrecededIn( RV, new String[] { "ë" }, R0, "gu" );
+               DeleteFromIfPrecededIn( RV, new String[] { "\u00eb" }, R0, "gu" 
);
        }
 
        /**
@@ -463,7 +463,8 @@
                                else if ( from!=null && from.EndsWith( prefix + 
search[i] ))
                                {
                                    sb.Length = sb.Length - (prefix.Length + 
search[i].Length);
-                                       sb.Insert(sb.Length - (prefix.Length + 
search[i].Length), replace );
+                    sb.Append(replace);
+                                       //sb.Insert(sb.Length - (prefix.Length 
+ search[i].Length), replace );
                                        modified = true;
                                        SetStrings();
                                        break;
@@ -494,7 +495,8 @@
                                if ( source.EndsWith( search[i] ))
                                {
                                    sb.Length = sb.Length - search[i].Length;
-                                   sb.Insert(sb.Length - search[i].Length, 
replace );  // replacement for java's replace
+                    sb.Append(replace);
+                                   //sb.Insert(sb.Length - search[i].Length, 
replace );  // replacement for java's replace
                                        modified = true;
                                        found = true;
                                        SetStrings();

Reply via email to