Author: davisp
Date: Thu Jan 20 01:15:55 2011
New Revision: 1061088

URL: http://svn.apache.org/viewvc?rev=1061088&view=rev
Log:
Fix bug that allows invalid UTF-8 after valid escapes.

Merges r991073 from trunk to branches/1.0.x
Fixes COUCHDB-875


Modified:
    couchdb/branches/1.0.x/   (props changed)
    couchdb/branches/1.0.x/etc/default/couchdb   (props changed)
    couchdb/branches/1.0.x/src/mochiweb/mochijson2.erl

Propchange: couchdb/branches/1.0.x/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jan 20 01:15:55 2011
@@ -6,4 +6,4 @@
 /couchdb/branches/list-iterator:782292-784593
 /couchdb/branches/tail_header:775760-778477
 /couchdb/tags/0.10.0:825400
-/couchdb/trunk:984170,984178,984214,984228,984237,984241,984501,1045203
+/couchdb/trunk:984170,984178,984214,984228,984237,984241,984501,991073,1045203

Propchange: couchdb/branches/1.0.x/etc/default/couchdb
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jan 20 01:15:55 2011
@@ -6,5 +6,5 @@
 /couchdb/branches/list-iterator/etc/default/couchdb:782292-784593
 /couchdb/branches/tail_header/etc/default/couchdb:775760-778477
 /couchdb/tags/0.10.0/etc/default/couchdb:825400
-/couchdb/trunk/etc/default/couchdb:984170,984178,984214,984228,984237,984241,984501,1045203
+/couchdb/trunk/etc/default/couchdb:984170,984178,984214,984228,984237,984241,984501,991073,1045203
 /incubator/couchdb/trunk/etc/default/couchdb:642419-694440

Modified: couchdb/branches/1.0.x/src/mochiweb/mochijson2.erl
URL: 
http://svn.apache.org/viewvc/couchdb/branches/1.0.x/src/mochiweb/mochijson2.erl?rev=1061088&r1=1061087&r2=1061088&view=diff
==============================================================================
--- couchdb/branches/1.0.x/src/mochiweb/mochijson2.erl (original)
+++ couchdb/branches/1.0.x/src/mochiweb/mochijson2.erl Thu Jan 20 01:15:55 2011
@@ -405,8 +405,22 @@ tokenize_string(B, S=#decoder{offset=O},
                 Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc),
                 tokenize_string(B, ?ADV_COL(S, 6), Acc1)
             end;
-        <<_:O/binary, C, _/binary>> ->
-            tokenize_string(B, ?INC_CHAR(S, C), [C | Acc])
+        <<_:O/binary, C1, _/binary>> when C1 < 128 ->
+            tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]);
+        <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
+                C2 >= 128, C2 =< 191 ->
+            tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]);
+        <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191 ->
+            tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]);
+        <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191,
+                C4 >= 128, C4 =< 191 ->
+            tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]);
+        _ ->
+            throw(invalid_utf8)
     end.
 
 tokenize_number(B, S) ->
@@ -653,7 +667,9 @@ test_input_validation() ->
         <<?Q, 16#E0, 16#80,16#7F, ?Q>>,
         <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>,
         % we don't support code points > 10FFFF per RFC 3629
-        <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>
+        <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>,
+        %% escape characters trigger a different code path
+        <<?Q, $\\, $\n, 16#80, ?Q>>
     ],
     lists:foreach(fun(X) ->
         ok = try decode(X) catch invalid_utf8 -> ok end


Reply via email to