Author: atsushi
Date: 2005-06-21 02:22:58 -0400 (Tue, 21 Jun 2005)
New Revision: 46277
Modified:
branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/SimpleCollator.cs
branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/TestDriver.cs
Log:
2005-06-21 Atsushi Enomoto <[EMAIL PROTECTED]>
* SimpleCollator.cs :
Implemented IsSuffix() and LastIndexOf().
Several fixes on index > 0 cases.
* TestDriver.cs : sample IsSuffix() and LastIndexOf() usage and more.
Modified: branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
===================================================================
--- branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
2005-06-21 05:03:52 UTC (rev 46276)
+++ branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
2005-06-21 06:22:58 UTC (rev 46277)
@@ -1,5 +1,12 @@
2005-06-21 Atsushi Enomoto <[EMAIL PROTECTED]>
+ * SimpleCollator.cs :
+ Implemented IsSuffix() and LastIndexOf().
+ Several fixes on index > 0 cases.
+ * TestDriver.cs : sample IsSuffix() and LastIndexOf() usage and more.
+
+2005-06-21 Atsushi Enomoto <[EMAIL PROTECTED]>
+
* Collation-notes.txt : updated (status, impl. classes).
* MSCompatUnicodeTable.cs : Korean Jamo are not really expansions.
Modified:
branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/SimpleCollator.cs
===================================================================
---
branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/SimpleCollator.cs
2005-06-21 05:03:52 UTC (rev 46276)
+++
branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/SimpleCollator.cs
2005-06-21 06:22:58 UTC (rev 46277)
@@ -67,7 +67,7 @@
return GetSortKey (s, 0, s.Length, options);
}
- SortKey GetSortKey (string s, int start, int length,
CompareOptions options)
+ public SortKey GetSortKey (string s, int start, int length,
CompareOptions options)
{
SetOptions (options);
@@ -273,6 +273,78 @@
#endregion
+ #region IsSuffix()
+
+ public bool IsSuffix (string src, string target, CompareOptions
opt)
+ {
+ return IsSuffix (src, target, 0, src.Length, opt);
+ }
+
+ // It is mostly copy of IsPrefix().
+ public bool IsSuffix (string s, string target, int start, int
length, CompareOptions opt)
+ {
+ SetOptions (opt);
+
+ int min = length > target.Length ? target.Length :
length;
+ int si = start + length - 1;
+
+ // FIXME: this is not enough to handle tailorings.
+ for (int j = min - 1; j >= 0; j--, si--) {
+ int ci = FilterOptions (s [si]);
+ int cj = FilterOptions (target [j]);
+ if (ci == cj)
+ continue;
+ if (IsIgnorable (s [si])) {
+ if (!IsIgnorable (target [j]))
+ j++;
+ continue;
+ }
+ else if (IsIgnorable (target [j])) {
+ si++;
+ continue;
+ }
+
+ // FIXME: should handle expansions (and it
+ // should be before codepoint comparison).
+ string expansion = GetExpansion (s [si]);
+ if (expansion != null)
+ return false;
+ expansion = GetExpansion (target [j]);
+ if (expansion != null)
+ return false;
+
+ if (Uni.Categories (ci) != Uni.Categories (cj)
||
+ Uni.Level1 (ci) != Uni.Level1 (cj) ||
+ !ignoreNonSpace && Uni.Level2 (ci) !=
Uni.Level2 (cj) ||
+ Uni.Level3 (ci) != Uni.Level3 (cj))
+ return false;
+ if (!Uni.HasSpecialWeight ((char) ci))
+ continue;
+ if (Uni.IsJapaneseSmallLetter ((char) ci) !=
+ Uni.IsJapaneseSmallLetter ((char) cj) ||
+ Uni.GetJapaneseDashType ((char) ci) !=
+ Uni.GetJapaneseDashType ((char) cj) ||
+ !Uni.IsHiragana ((char) ci) !=
+ !Uni.IsHiragana ((char) cj) ||
+ Uni.IsHalfWidthKana ((char) ci) !=
+ Uni.IsHalfWidthKana ((char) cj))
+ return false;
+ }
+ if (si == min) {
+ // All codepoints in the compared range
+ // matches. In that case, what matters
+ // is whether the remaining part of
+ // "target" is ignorable or not.
+ for (int i = target.Length - min - 1; i >= 0;
i--)
+ if (!IsIgnorable (target [i]))
+ return false;
+ return true;
+ }
+ return true;
+ }
+
+ #endregion
+
#region IndexOf()
public int IndexOf (string s, char target)
@@ -343,10 +415,9 @@
// expansions
int idx = IndexOf (s, target [0], start,
length, opt);
if (idx < 0)
+ return -1;
+ if (IsPrefix (s, target, idx, length - (idx -
start), opt))
return idx;
-
- if (IsPrefix (s, target, start + idx, length -
idx, opt))
- return idx;
start++;
length--;
} while (length > 0);
@@ -354,5 +425,87 @@
}
#endregion
+
+ #region LastIndexOf()
+
+ public int LastIndexOf (string s, char target)
+ {
+ return LastIndexOf (s, target, 0, s.Length,
CompareOptions.None);
+ }
+
+ public int LastIndexOf (string s, char target, CompareOptions
opt)
+ {
+ return LastIndexOf (s, target, 0, s.Length, opt);
+ }
+
+ public int LastIndexOf (string s, char target, int start, int
length, CompareOptions opt)
+ {
+ // If target has an expansion, then use string search.
+ string expansion = GetExpansion (target);
+ if (expansion != null)
+ return LastIndexOf (s, expansion, start,
length, opt);
+
+ SetOptions (opt);
+
+ int ti = FilterOptions ((int) target);
+ for (int idx = start + length - 1; idx >= start; idx--)
{
+ switch (char.GetUnicodeCategory (s [idx])) {
+ case UnicodeCategory.PrivateUse:
+ case UnicodeCategory.Surrogate:
+ if (s [idx] != target)
+ continue;
+ return idx;
+ }
+
+ expansion = GetExpansion (s [idx]);
+ if (expansion != null)
+ continue; // since target cannot be
expansion as conditioned above.
+ if (s [idx] == target)
+ return idx;
+ int si = FilterOptions ((int) s [idx]);
+ if (Uni.Categories (si) != Uni.Categories (ti)
||
+ Uni.Level1 (si) != Uni.Level1 (ti) ||
+ !ignoreNonSpace && Uni.Level2 (si) !=
Uni.Level2 (ti) ||
+ Uni.Level3 (si) != Uni.Level3 (ti))
+ continue;
+ if (!Uni.HasSpecialWeight ((char) si))
+ return idx;
+ if (Uni.IsJapaneseSmallLetter ((char) si) !=
+ Uni.IsJapaneseSmallLetter ((char) ti) ||
+ Uni.GetJapaneseDashType ((char) si) !=
+ Uni.GetJapaneseDashType ((char) ti) ||
+ !Uni.IsHiragana ((char) si) !=
+ !Uni.IsHiragana ((char) ti) ||
+ Uni.IsHalfWidthKana ((char) si) !=
+ Uni.IsHalfWidthKana ((char) ti))
+ continue;
+ }
+ return -1;
+ }
+
+ public int LastIndexOf (string s, string target, CompareOptions
opt)
+ {
+ return LastIndexOf (s, target, 0, s.Length, opt);
+ }
+
+ public int LastIndexOf (string s, string target, int start, int
length, CompareOptions opt)
+ {
+ SetOptions (opt);
+
+ do {
+ // FIXME: this should be modified to handle
+ // expansions
+ int idx = LastIndexOf (s, target [0], start,
length, opt);
+ if (idx < 0)
+ return -1;
+
+ if (IsPrefix (s, target, idx, length - (idx -
start), opt))
+ return idx;
+ length = idx - start - 1;
+ } while (length > 0);
+ return -1;
+ }
+
+ #endregion
}
}
Modified:
branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/TestDriver.cs
===================================================================
--- branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/TestDriver.cs
2005-06-21 05:03:52 UTC (rev 46276)
+++ branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/TestDriver.cs
2005-06-21 06:22:58 UTC (rev 46277)
@@ -20,6 +20,10 @@
void Run ()
{
+ DumpSortKey ("AE");
+ DumpSortKey ("\u00C6");
+ DumpSortKey ("ABCABC", 5, 1, CompareOptions.IgnoreCase);
+
Compare ("1", "2");
Compare ("A", "a");
Compare ("A", "a", CompareOptions.IgnoreCase);
@@ -29,24 +33,42 @@
Compare ("AE", "\u00C6");
Compare ("AB\u01c0C", "A\u01c0B\u01c0C",
CompareOptions.IgnoreSymbols);
Compare ("A\u0304", "\u0100"); // diacritical weight
addition
+ Compare ("ABCABC", 5, 1, "c", 0, 1,
CompareOptions.IgnoreCase);
IndexOf ("ABC", '1', CompareOptions.None);
- IndexOf ("ABC", 'c', CompareOptions.IgnoreCase);
- IndexOf ("ABC", '\uFF22', CompareOptions.IgnoreCase |
CompareOptions.IgnoreWidth);
+ IndexOf ("ABCABC", 'c', CompareOptions.IgnoreCase);
+ IndexOf ("ABCABC", '\uFF22', CompareOptions.IgnoreCase
| CompareOptions.IgnoreWidth);
IndexOf ("ABCDE", '\u0117',
CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase);
+ IndexOf ("ABCABC", 'B', 1, 5,
CompareOptions.IgnoreCase);
+ LastIndexOf ("ABC", '1', CompareOptions.None);
+ LastIndexOf ("ABCABC", 'c', CompareOptions.IgnoreCase);
+ LastIndexOf ("ABCABC", '\uFF22',
CompareOptions.IgnoreCase | CompareOptions.IgnoreWidth);
+ LastIndexOf ("ABCDE", '\u0117',
CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase);
+
IsPrefix ("ABC", "c", CompareOptions.IgnoreCase);
IsPrefix ("BC", "c", CompareOptions.IgnoreCase);
IsPrefix ("C", "c", CompareOptions.IgnoreCase);
- IsPrefix ("E", "\u0117", CompareOptions.IgnoreNonSpace
| CompareOptions.IgnoreCase);
+ IsPrefix ("EDCBA", "\u0117",
CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase);
+ IsSuffix ("ABC", "c", CompareOptions.IgnoreCase);
+ IsSuffix ("BC", "c", CompareOptions.IgnoreCase);
+ IsSuffix ("CBA", "c", CompareOptions.IgnoreCase);
+ IsSuffix ("ABCDE", "\u0117",
CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase);
+
IndexOf ("ABC", "1", CompareOptions.None);
- IndexOf ("ABC", "c", CompareOptions.IgnoreCase);
- IndexOf ("ABC", "\uFF22", CompareOptions.IgnoreCase |
CompareOptions.IgnoreWidth);
+ IndexOf ("ABCABC", "c", CompareOptions.IgnoreCase);
+ IndexOf ("ABCABC", "\uFF22", CompareOptions.IgnoreCase
| CompareOptions.IgnoreWidth);
IndexOf ("ABCDE", "\u0117",
CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase);
+ IndexOf ("ABCABC", "BC", CompareOptions.IgnoreCase);
+ IndexOf ("BBCBBC", "BC", CompareOptions.IgnoreCase);
- DumpSortKey ("AE");
- DumpSortKey ("\u00C6");
+ LastIndexOf ("ABC", "1", CompareOptions.None);
+ LastIndexOf ("ABCABC", "c", CompareOptions.IgnoreCase);
+ LastIndexOf ("ABCABC", "\uFF22",
CompareOptions.IgnoreCase | CompareOptions.IgnoreWidth);
+ LastIndexOf ("ABCDE", "\u0117",
CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase);
+ LastIndexOf ("ABCABC", "BC", CompareOptions.IgnoreCase);
+ LastIndexOf ("BBCBBC", "BC", CompareOptions.IgnoreCase);
/*
// dump sortkey for every single character.
@@ -75,10 +97,21 @@
coll.Compare (s1, s2, opt), s1, s2);
}
+ void Compare (string s1, int idx1, int len1, string s2, int
idx2, int len2, CompareOptions opt)
+ {
+ Console.Error.WriteLine ("{0} {1} / {2}",
+ coll.Compare (s1, idx1, len1, s2, idx2, len2,
opt), s1, s2);
+ }
+
void IndexOf (string s, char c, CompareOptions opt)
{
+ IndexOf (s, c, 0, s.Length, opt);
+ }
+
+ void IndexOf (string s, char c, int idx, int len,
CompareOptions opt)
+ {
Console.Error.WriteLine ("cIndex: {0} {1} / {2}",
- coll.IndexOf (s, c, opt), s, c);
+ coll.IndexOf (s, c, idx, len, opt), s, c);
}
void IndexOf (string s1, string s2, CompareOptions opt)
@@ -93,9 +126,32 @@
coll.IsPrefix (s1, s2, opt), s1, s2);
}
+ void LastIndexOf (string s, char c, CompareOptions opt)
+ {
+ Console.Error.WriteLine ("cLast: {0} {1} / {2}",
+ coll.LastIndexOf (s, c, opt), s, c);
+ }
+
+ void LastIndexOf (string s1, string s2, CompareOptions opt)
+ {
+ Console.Error.WriteLine ("sLast: {0} {1} / {2}",
+ coll.LastIndexOf (s1, s2, opt), s1, s2);
+ }
+
+ void IsSuffix (string s1, string s2, CompareOptions opt)
+ {
+ Console.Error.WriteLine ("IsSuffix: {0} {1} / {2}",
+ coll.IsSuffix (s1, s2, opt), s1, s2);
+ }
+
void DumpSortKey (string s)
{
- byte [] data = coll.GetSortKey (s).KeyData;
+ DumpSortKey (s, 0, s.Length, CompareOptions.None);
+ }
+
+ void DumpSortKey (string s, int idx, int len, CompareOptions
opt)
+ {
+ byte [] data = coll.GetSortKey (s, idx, len,
opt).KeyData;
foreach (byte b in data)
Console.Error.Write ("{0:X02} ", b);
Console.Error.WriteLine (" : {0}", s);
_______________________________________________
Mono-patches maillist - [email protected]
http://lists.ximian.com/mailman/listinfo/mono-patches