Hello, Finally I got my managed collation engine working, though it is far from complete form I aim and it is mostly conceptual for now (it does not handle many things, performs so bad). For now it handles ASCII case sensitivity, large part of CompareOptions flags, large part of diacritical mark processing.
Here is the steps to make it available: 1. apply attached patch against mcs/class/corlib. 2. go to mcs/class/corlib/Mono.Globalization.Unicode 3. run "make". It will automatically downloads some files from some sites. For now without this step the build b0rks. 4. make corlib as usual. 5. set MONO_USE_MANAGED_COLLATION environment variable as "yes". Here is a serious problem. In step 3 it makes 1.2MB of a C# source file that results in 500KB increase of mscorlib.dll. It could be made as C header i.e. runtime source, like existing culture-info-table.h. But it is still huge. And for about 200KB of data, they are just for CJK cultures so they won't be used unless we use those cultures to handle culture-sensitive CJK collation. That is mostly waste of memory. One possible solution idea is to create different assembly and loads the tables like: - CompareInfo or whatever holds those tables as static variables. - If the variable is null, then it tries to load the "internally stored table" via runtime icall_1. However at this stage it returns null, since nothing is stored. - Then, CompareInfo or whatever loads "table-only assembly" via reflection and loads table into memory, and then invokes an icall_2 that sets the table as runtime internal table. - Next time CompareInfo tries to fill the table, icall_1 will return the table. In fact the same discussion also applies to string Normalization tables (to support String.Normalize() introduced in .NET 2.0). Any good ideas for this problem? Thanks, Atsushi Eno
Index: corlib.dll.sources =================================================================== --- corlib.dll.sources (revision 46284) +++ corlib.dll.sources (working copy) @@ -8,6 +8,12 @@ Microsoft.Win32/Win32RegistryApi.cs Microsoft.Win32/Win32ResultCode.cs Microsoft.Win32.SafeHandles/SafeFileHandle.cs +Mono.Globalization.Unicode/CodePointIndexer.cs +Mono.Globalization.Unicode/MSCompatUnicodeTable.cs +Mono.Globalization.Unicode/MSCompatUnicodeTableUtil.cs +Mono.Globalization.Unicode/SimpleCollator.cs +Mono.Globalization.Unicode/SortKey.cs +Mono.Globalization.Unicode/SortKeyBuffer.cs Mono/Runtime.cs Mono.Math/BigInteger.cs Mono.Math.Prime/ConfidenceFactor.cs @@ -300,7 +306,6 @@ System.Globalization/NumberFormatInfo.cs System.Globalization/NumberStyles.cs System.Globalization/RegionInfo.cs -System.Globalization/SortKey.cs System.Globalization/StringInfo.cs System.Globalization/TaiwanCalendar.cs System.Globalization/TextElementEnumerator.cs Index: System.Globalization/CompareInfo.cs =================================================================== --- System.Globalization/CompareInfo.cs (revision 46284) +++ System.Globalization/CompareInfo.cs (working copy) @@ -34,12 +34,17 @@ using System.Reflection; using System.Runtime.Serialization; using System.Runtime.CompilerServices; +using Mono.Globalization.Unicode; namespace System.Globalization { [Serializable] public class CompareInfo : IDeserializationCallback { + public static readonly bool UseManagedCollation = + Environment.GetEnvironmentVariable ("MONO_USE_MANAGED_COLLATION") + == "yes"; + // Keep in synch with MonoCompareInfo in the runtime. private int culture; [NonSerialized] @@ -47,6 +52,8 @@ [NonSerialized] private IntPtr ICU_collator; private int win32LCID; // Unused, but MS.NET serializes this + + SimpleCollator collator; /* Hide the .ctor() */ CompareInfo() {} @@ -57,25 +64,50 @@ internal CompareInfo (CultureInfo ci) { this.culture = ci.LCID; - this.icu_name = ci.IcuName; - this.construct_compareinfo (icu_name); + if (UseManagedCollation) + collator = new SimpleCollator (ci); + else { + this.icu_name = ci.IcuName; + this.construct_compareinfo (icu_name); + } } [MethodImplAttribute (MethodImplOptions.InternalCall)] private extern void free_internal_collator (); - + ~CompareInfo () { - free_internal_collator (); + if (!UseManagedCollation) + free_internal_collator (); } - - + [MethodImplAttribute (MethodImplOptions.InternalCall)] private extern int internal_compare (string str1, int offset1, int length1, string str2, int offset2, int length2, CompareOptions options); + private int internal_compare_managed (string str1, int offset1, + int length1, string str2, + int offset2, int length2, + CompareOptions options) + { + return collator.Compare (str1, offset1, length1, + str2, offset2, length2, options); + } + + private int internal_compare_switch (string str1, int offset1, + int length1, string str2, + int offset2, int length2, + CompareOptions options) + { + return UseManagedCollation ? + internal_compare_managed (str1, offset1, length1, + str2, offset2, length2, options) : + internal_compare (str1, offset1, length1, + str2, offset2, length2, options); + } + public virtual int Compare (string string1, string string2) { /* Short cuts... */ @@ -89,7 +121,7 @@ return(1); } - return(internal_compare (string1, 0, string1.Length, + return(internal_compare_switch (string1, 0, string1.Length, string2, 0, string2.Length, CompareOptions.None)); } @@ -108,7 +140,7 @@ return(1); } - return(internal_compare (string1, 0, string1.Length, + return(internal_compare_switch (string1, 0, string1.Length, string2, 0, string2.Length, options)); } @@ -146,7 +178,7 @@ throw new ArgumentOutOfRangeException ("Offset2 is greater than or equal to the length of string2"); } - return(internal_compare (string1, offset1, + return(internal_compare_switch (string1, offset1, string1.Length-offset1, string2, offset2, string2.Length-offset2, @@ -187,7 +219,7 @@ throw new ArgumentOutOfRangeException ("Offset2 is greater than or equal to the length of string2"); } - return(internal_compare (string1, offset1, + return(internal_compare_switch (string1, offset1, string1.Length-offset1, string2, offset2, string2.Length-offset1, @@ -240,7 +272,7 @@ throw new ArgumentOutOfRangeException ("Length2 is greater than the number of characters from offset2 to the end of string2"); } - return(internal_compare (string1, offset1, length1, + return(internal_compare_switch (string1, offset1, length1, string2, offset2, length2, CompareOptions.None)); } @@ -292,7 +324,7 @@ throw new ArgumentOutOfRangeException ("Length2 is greater than the number of characters from offset2 to the end of string2"); } - return(internal_compare (string1, offset1, length1, + return(internal_compare_switch (string1, offset1, length1, string2, offset2, length2, options)); } @@ -372,6 +404,8 @@ public virtual SortKey GetSortKey(string source, CompareOptions options) { + if (UseManagedCollation) + return collator.GetSortKey (source, options); SortKey key=new SortKey (culture, source, options); /* Need to do the icall here instead of in the @@ -460,7 +494,25 @@ int count, char value, CompareOptions options, bool first); - + + private int internal_index_managed (string s, int sindex, + int count, char c, CompareOptions opt, + bool first) + { + return first ? + collator.IndexOf (s, c, sindex, count, opt) : + collator.LastIndexOf (s, c, sindex, count, opt); + } + + private int internal_index_switch (string s, int sindex, + int count, char c, CompareOptions opt, + bool first) + { + return UseManagedCollation ? + internal_index_managed (s, sindex, count, c, opt, first) : + internal_index (s, sindex, count, c, opt, first); + } + public virtual int IndexOf (string source, char value, int startIndex, int count, CompareOptions options) @@ -492,7 +544,7 @@ } return(-1); } else { - return (internal_index (source, startIndex, + return (internal_index_switch (source, startIndex, count, value, options, true)); } @@ -503,7 +555,25 @@ int count, string value, CompareOptions options, bool first); - + + private int internal_index_managed (string s1, int sindex, + int count, string s2, CompareOptions opt, + bool first) + { + return first ? + collator.IndexOf (s1, s2, sindex, count, opt) : + collator.LastIndexOf (s1, s2, sindex, count, opt); + } + + private int internal_index_switch (string s1, int sindex, + int count, string s2, CompareOptions opt, + bool first) + { + return UseManagedCollation ? + internal_index_managed (s1, sindex, count, s2, opt, first) : + internal_index (s1, sindex, count, s2, opt, first); + } + public virtual int IndexOf (string source, string value, int startIndex, int count, CompareOptions options) @@ -524,7 +594,7 @@ return(-1); } - return (internal_index (source, startIndex, count, + return (internal_index_switch (source, startIndex, count, value, options, true)); } @@ -682,7 +752,7 @@ } return(-1); } else { - return (internal_index (source, startIndex, + return (internal_index_switch (source, startIndex, count, value, options, false)); } @@ -713,7 +783,7 @@ return(0); } - return(internal_index (source, startIndex, count, + return(internal_index_switch (source, startIndex, count, value, options, false)); } @@ -724,6 +794,9 @@ void IDeserializationCallback.OnDeserialization(object sender) { + if (UseManagedCollation) + return; // maybe nothing to do. + /* This will build the ICU collator, and store * the pointer in ICU_collator */
_______________________________________________ Mono-devel-list mailing list Mono-devel-list@lists.ximian.com http://lists.ximian.com/mailman/listinfo/mono-devel-list