Hey guys, the System.Uri ctor is sometimes wrong for utf8 strings that need encoding, as shown is the following code snippet:
<SNIP> using System; public class Test { public static void Main() { string ok = "file:///tmp/x (%232).jpg"; string nok = "file:///tmp/ü (%232).jpg"; Console.WriteLine (new Uri (ok).ToString ()); Console.WriteLine (new Uri (nok).ToString ()); } } <SNAP> returns: file:///tmp/x (%232).jpg file:///tmp/ü (%25232).jpg which is wrong. The culprit being the (internal) EscapeString which iterates on a byte[] but check (and then replace) for IsHexEncoding on the String itself. For multibyte encodings (like utf8) the index can be out of sync. I attached a patch, please review. regards Stephane
Index: System/Uri.cs =================================================================== --- System/Uri.cs (revision 96651) +++ System/Uri.cs (working copy) @@ -1074,12 +1074,9 @@ if (str == null) return String.Empty; - byte [] data = Encoding.UTF8.GetBytes (str); StringBuilder s = new StringBuilder (); - int len = data.Length; + int len = str.Length; for (int i = 0; i < len; i++) { - char c = (char) data [i]; - // reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," // mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" // control = <US-ASCII coded characters 00-1F and 7F hexadecimal> // space = <US-ASCII coded character 20 hexadecimal> @@ -1090,26 +1087,28 @@ // i.e. for encoding that follows the pattern // "%hexhex" in a string, where "hex" is a digit from 0-9 // or a letter from A-F (case-insensitive). - if('%' == c && IsHexEncoding(str,i)) - { + if (IsHexEncoding (str,i)) { // if ,yes , copy it as is - s.Append(c); - s.Append(str[++i]); - s.Append(str[++i]); + s.Append(str.Substring (i, 3)); + i += 2; continue; } - if ((c <= 0x20) || (c >= 0x7f) || - ("<>%\"{}|\\^`".IndexOf (c) != -1) || - (escapeHex && (c == '#')) || - (escapeBrackets && (c == '[' || c == ']')) || - (escapeReserved && (";/?:@&=+$,".IndexOf (c) != -1))) { - s.Append (HexEscape (c)); - continue; + byte [] data = Encoding.UTF8.GetBytes (new char[] {str[i]}); + int length = data.Length; + for (int j = 0; j < length; j++) { + char c = (char) data [j]; + // reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," + if ((c <= 0x20) || (c >= 0x7f) || + ("<>%\"{}|\\^`".IndexOf (c) != -1) || + (escapeHex && (c == '#')) || + (escapeBrackets && (c == '[' || c == ']')) || + (escapeReserved && (";/?:@&=+$,".IndexOf (c) != -1))) { + s.Append (HexEscape (c)); + continue; + } + s.Append (c); } - - - s.Append (c); } return s.ToString ();
_______________________________________________ Mono-devel-list mailing list Mono-devel-list@lists.ximian.com http://lists.ximian.com/mailman/listinfo/mono-devel-list