Author: Ronan Lamy <[email protected]>
Branch: py3.6
Changeset: r97798:1c5e6646df47
Date: 2019-10-16 17:57 +0100
http://bitbucket.org/pypy/pypy/changeset/1c5e6646df47/
Log: Fix handling escape characters in HZ codec (bpo-30003)
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c
b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/_codecs_cn.c
@@ -335,14 +335,16 @@
DBCHAR code;
if (c < 0x80) {
- if (state->i == 0) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ if (state->i) {
+ WRITE2('~', '}');
+ NEXT_OUT(2);
+ state->i = 0;
}
- else {
- WRITE3('~', '}', (unsigned char)c)
- NEXT(1, 3)
- state->i = 0;
+ WRITE1((unsigned char)c);
+ NEXT(1, 1);
+ if (c == '~') {
+ WRITE1('~');
+ NEXT_OUT(1);
}
continue;
}
@@ -390,17 +392,15 @@
unsigned char c2 = IN2;
REQUIRE_INBUF(2)
- if (c2 == '~') {
+ if (c2 == '~' && state->i == 0) {
WRITE1('~')
- NEXT(2, 1)
- continue;
}
else if (c2 == '{' && state->i == 0)
state->i = 1; /* set GB */
+ else if (c2 == '\n' && state->i == 0)
+ ; /* line-continuation */
else if (c2 == '}' && state->i == 1)
state->i = 0; /* set ASCII */
- else if (c2 == '\n')
- ; /* line-continuation */
else
return 1;
NEXT(2, 0);
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py
b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -109,6 +109,9 @@
assert s == 'foobar' and type(s) is str
s = encode(c, u'\u5f95\u6cef'.encode('utf8'), 2)
assert s == '~{abc}~}'
+ # bpo-30003
+ s = encode(c, 'ab~cd', 5)
+ assert s == 'ab~~cd'
def test_encode_hz_error():
# error
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit