Hey guys,

the System.Uri ctor is sometimes wrong for utf8 strings that need
encoding, as shown is the following code snippet:

<SNIP>
using System;
public class Test
{
        public static void Main()
        {
                string ok = "file:///tmp/x (%232).jpg";
                string nok = "file:///tmp/ü (%232).jpg";

                Console.WriteLine (new Uri (ok).ToString ());
                Console.WriteLine (new Uri (nok).ToString ());
        }
}
<SNAP>

returns:

file:///tmp/x (%232).jpg
file:///tmp/ü (%25232).jpg

which is wrong.

The culprit being the (internal) EscapeString which iterates on a byte[] but 
check (and then replace) 
for IsHexEncoding on the String itself. For multibyte encodings (like utf8) the 
index can be out of sync.

I attached a patch, please review.

regards

Stephane
Index: System/Uri.cs
===================================================================
--- System/Uri.cs	(revision 96651)
+++ System/Uri.cs	(working copy)
@@ -1074,12 +1074,9 @@
 			if (str == null)
 				return String.Empty;
 			
-			byte [] data = Encoding.UTF8.GetBytes (str);
 			StringBuilder s = new StringBuilder ();
-			int len = data.Length;	
+			int len = str.Length;	
 			for (int i = 0; i < len; i++) {
-				char c = (char) data [i];
-				// reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
 				// mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
 				// control     = <US-ASCII coded characters 00-1F and 7F hexadecimal>
 				// space       = <US-ASCII coded character 20 hexadecimal>
@@ -1090,26 +1087,28 @@
 				// i.e. for encoding that follows the pattern 
 				// "%hexhex" in a string, where "hex" is a digit from 0-9 
 				// or a letter from A-F (case-insensitive).
-				if('%' == c && IsHexEncoding(str,i))
-				{
+				if (IsHexEncoding (str,i)) {
 					// if ,yes , copy it as is
-					s.Append(c);
-					s.Append(str[++i]);
-					s.Append(str[++i]);
+					s.Append(str.Substring (i, 3));
+					i += 2;
 					continue;
 				}
 
-				if ((c <= 0x20) || (c >= 0x7f) || 
-				    ("<>%\"{}|\\^`".IndexOf (c) != -1) ||
-				    (escapeHex && (c == '#')) ||
-				    (escapeBrackets && (c == '[' || c == ']')) ||
-				    (escapeReserved && (";/?:@&=+$,".IndexOf (c) != -1))) {
-					s.Append (HexEscape (c));
-					continue;
+				byte [] data = Encoding.UTF8.GetBytes (new char[] {str[i]});
+				int length = data.Length;
+				for (int j = 0; j < length; j++) {
+					char c = (char) data [j];
+					// reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
+					if ((c <= 0x20) || (c >= 0x7f) || 
+					    ("<>%\"{}|\\^`".IndexOf (c) != -1) ||
+					    (escapeHex && (c == '#')) ||
+					    (escapeBrackets && (c == '[' || c == ']')) ||
+					    (escapeReserved && (";/?:@&=+$,".IndexOf (c) != -1))) {
+						s.Append (HexEscape (c));
+						continue;
+					}	
+					s.Append (c);
 				}
-				
-					
-				s.Append (c);
 			}
 			
 			return s.ToString ();
_______________________________________________
Mono-devel-list mailing list
Mono-devel-list@lists.ximian.com
http://lists.ximian.com/mailman/listinfo/mono-devel-list

Reply via email to