Author: Ronan Lamy <[email protected]>
Branch: py3.6
Changeset: r97798:1c5e6646df47
Date: 2019-10-16 17:57 +0100
http://bitbucket.org/pypy/pypy/changeset/1c5e6646df47/

Log:    Fix handling escape characters in HZ codec (bpo-30003)

diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c 
b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c
@@ -335,14 +335,16 @@
         DBCHAR code;
 
         if (c < 0x80) {
-            if (state->i == 0) {
-                WRITE1((unsigned char)c)
-                NEXT(1, 1)
+            if (state->i) {
+                WRITE2('~', '}');
+                NEXT_OUT(2);
+                state->i = 0;
             }
-            else {
-                WRITE3('~', '}', (unsigned char)c)
-                NEXT(1, 3)
-                state->i = 0;
+            WRITE1((unsigned char)c);
+            NEXT(1, 1);
+            if (c == '~') {
+                WRITE1('~');
+                NEXT_OUT(1);
             }
             continue;
         }
@@ -390,17 +392,15 @@
             unsigned char c2 = IN2;
 
             REQUIRE_INBUF(2)
-            if (c2 == '~') {
+            if (c2 == '~' && state->i == 0) {
                 WRITE1('~')
-                NEXT(2, 1)
-                continue;
             }
             else if (c2 == '{' && state->i == 0)
                 state->i = 1; /* set GB */
+            else if (c2 == '\n' && state->i == 0)
+                ; /* line-continuation */
             else if (c2 == '}' && state->i == 1)
                 state->i = 0; /* set ASCII */
-            else if (c2 == '\n')
-                ; /* line-continuation */
             else
                 return 1;
             NEXT(2, 0);
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py 
b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -109,6 +109,9 @@
     assert s == 'foobar' and type(s) is str
     s = encode(c, u'\u5f95\u6cef'.encode('utf8'), 2)
     assert s == '~{abc}~}'
+    # bpo-30003
+    s = encode(c, 'ab~cd', 5)
+    assert s == 'ab~~cd'
 
 def test_encode_hz_error():
     # error
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to