Author: abock
Date: 2005-11-27 22:02:49 -0500 (Sun, 27 Nov 2005)
New Revision: 53533

Modified:
   trunk/entagged-sharp/ChangeLog
   trunk/entagged-sharp/src/EncodingInfo.cs
Log:
2005-11-27  Aaron Bockover  <[EMAIL PROTECTED]>

    * src/EncodingInfo.cs: UnicodeValidator class to validate a string as
    UTF-8; by Ruben Vermeersch



Modified: trunk/entagged-sharp/ChangeLog
===================================================================
--- trunk/entagged-sharp/ChangeLog      2005-11-28 00:45:25 UTC (rev 53532)
+++ trunk/entagged-sharp/ChangeLog      2005-11-28 03:02:49 UTC (rev 53533)
@@ -1,3 +1,8 @@
+2005-11-27  Aaron Bockover  <[EMAIL PROTECTED]>
+
+       * src/EncodingInfo.cs: UnicodeValidator class to validate a string as
+       UTF-8; by Ruben Vermeersch
+
 2005-11-24  Aaron Bockover  <[EMAIL PROTECTED]>
 
        * vacuum.cs: Runs make maintainer-clean and removes extra files

Modified: trunk/entagged-sharp/src/EncodingInfo.cs
===================================================================
--- trunk/entagged-sharp/src/EncodingInfo.cs    2005-11-28 00:45:25 UTC (rev 
53532)
+++ trunk/entagged-sharp/src/EncodingInfo.cs    2005-11-28 03:02:49 UTC (rev 
53533)
@@ -102,4 +102,81 @@
             return sb.ToString().Substring(0,sb.Length-1);
         }
     }
+public static class UnicodeValidator
+{
+    public static bool ValidateUtf8(byte [] str) 
+    {
+        int i, min = 0, val = 0;
+        
+        try {
+            for(i = 0; i < str.Length; i++) {
+                if(str[i] < 128) {
+                    continue;
+                }
+                
+                if((str[i] & 0xe0) == 0xc0) { /* 110xxxxx */
+                    if((str[i] & 0x1e) == 0) {
+                        return false;
+                    }
+                    
+                    if((str[++i] & 0xc0) != 0x80) { /* 10xxxxxx */
+                        return false;
+                    }
+                } else {
+                    bool skip_next_continuation = false;
+                    
+                    if((str[i] & 0xf0) == 0xe0) { /* 1110xxxx */
+                        min = 1 << 11;
+                        val = str[i] & 0x0f;
+                        skip_next_continuation = true;
+                    } else if((str[i] & 0xf8) == 0xf0) { /* 11110xxx */
+                        min = 1 << 16;
+                        val = str[i] & 0x07;  
+                    } else {
+                        return false;
+                    }
+                    
+                    if(!skip_next_continuation && !IsContinuationChar(str, 
++i, ref val)) {
+                        return false;
+                    }
+                
+                    if(!IsContinuationChar(str, ++i, ref val)) {
+                        return false;
+                    }
+                    
+                    if(!IsContinuationChar(str, ++i, ref val)) {
+                        return false;
+                    }
+                    
+                    if(val < min || !IsValidUnicode(val)) {
+                        return false;
+                    }
+                }
+            }
+        } catch(IndexOutOfRangeException e) {
+            return false;
+        }
+
+        return true;
+    }
+        
+    private static bool IsContinuationChar(byte [] str, int i, ref int val)
+    {
+        if((str[i] & 0xc0) != 0x80) { /* 10xxxxxx */
+            return false;
+        }
+
+        val <<= 6;  
+        val |= str[i] & 0x3f;
+        
+        return true;
+    }
+    
+    private static bool IsValidUnicode(int b)
+    {
+        return (b < 0x110000 && 
+            ((b & 0xFFFFF800) != 0xD800) && 
+            (b < 0xFDD0 || b > 0xFDEF) && 
+            (b & 0xFFFE) != 0xFFFE);
+    }
 }

_______________________________________________
Mono-patches maillist  -  [email protected]
http://lists.ximian.com/mailman/listinfo/mono-patches

Reply via email to