[ https://issues.apache.org/jira/browse/THRIFT-4207?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16693774#comment-16693774 ]
ASF GitHub Bot commented on THRIFT-4207: ---------------------------------------- jeking3 closed pull request #1274: THRIFT-4207: Make sure Python Accelerated protocol does not allow invalid UTF-8 URL: https://github.com/apache/thrift/pull/1274 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/lib/py/src/ext/protocol.tcc b/lib/py/src/ext/protocol.tcc index c025d0c968..e2e782a6c1 100644 --- a/lib/py/src/ext/protocol.tcc +++ b/lib/py/src/ext/protocol.tcc @@ -419,6 +419,8 @@ bool ProtocolBase<Impl>::encodeValue(PyObject* value, TType type, PyObject* type case T_STRING: { ScopedPyObject nval; + Py_ssize_t len; + char *str; if (PyUnicode_Check(value)) { nval.reset(PyUnicode_AsUTF8String(value)); @@ -426,11 +428,21 @@ bool ProtocolBase<Impl>::encodeValue(PyObject* value, TType type, PyObject* type return false; } } else { + if (isUtf8(typeargs)) { + if (PyBytes_AsStringAndSize(value, &str, &len) < 0) { + return false; + } + // Check that input is a valid UTF-8 string. + nval.reset(PyUnicode_DecodeUTF8(str, len, 0)); + if (!nval) { + return false; + } + } Py_INCREF(value); nval.reset(value); } - Py_ssize_t len = PyBytes_Size(nval.get()); + len = PyBytes_Size(nval.get()); if (!detail::check_ssize_t_32(len)) { return false; } diff --git a/lib/py/src/protocol/TProtocol.py b/lib/py/src/protocol/TProtocol.py index fd20cb7906..588d997e57 100644 --- a/lib/py/src/protocol/TProtocol.py +++ b/lib/py/src/protocol/TProtocol.py @@ -118,6 +118,8 @@ def writeDouble(self, dub): pass def writeString(self, str_val): + if isinstance(str_val, bytes): + str_val = str_val.decode('utf8') self.writeBinary(str_to_binary(str_val)) def writeBinary(self, str_val): diff --git a/test/py/FastbinaryTest.py b/test/py/FastbinaryTest.py index 05c0bb6d15..2a87d5fddc 100755 --- a/test/py/FastbinaryTest.py +++ b/test/py/FastbinaryTest.py @@ -74,6 +74,9 @@ def isOpen(self): u"\x20\xce\x91\x74\x74\xce\xb1\xe2\x85\xbd\xce\xba"\ u"\xc7\x83\xe2\x80\xbc" +ooe_bad = OneOfEach() +ooe_bad.zomg_unicode = b'\xbe\xef\xff' + if sys.version_info[0] == 2 and os.environ.get('THRIFT_TEST_PY_NO_UTF8STRINGS'): ooe1.zomg_unicode = ooe1.zomg_unicode.encode('utf8') ooe2.zomg_unicode = ooe2.zomg_unicode.encode('utf8') @@ -167,6 +170,27 @@ def _check_read(self, o): pprint(repr(o)) raise Exception('read value mismatch') + def _check_bad_unicode(self, o): + if (sys.version_info[0] == 2 and + os.environ.get('THRIFT_TEST_PY_NO_UTF8STRINGS')): + return + + try: + prot_slow = self._slow(TTransport.TMemoryBuffer()) + o.write(prot_slow) + except UnicodeError: + pass + else: + raise Exception('UnicodeError not raised') + + try: + prot_fast = self._fast(TTransport.TMemoryBuffer()) + o.write(prot_fast) + except UnicodeError: + pass + else: + raise Exception('UnicodeError not raised') + def do_test(self): self._check_write(HolyMoley()) self._check_read(HolyMoley()) @@ -188,6 +212,8 @@ def do_test(self): self._check_read(Backwards(**{"first_tag2": 4, "second_tag1": 2})) + self._check_bad_unicode(ooe_bad) + # One case where the serialized form changes, but only superficially. o = Backwards(**{"first_tag2": 4, "second_tag1": 2}) trans_fast = TTransport.TMemoryBuffer() ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Accelerated version of TBinaryProtocol allows invalid input to string fields. > ----------------------------------------------------------------------------- > > Key: THRIFT-4207 > URL: https://issues.apache.org/jira/browse/THRIFT-4207 > Project: Thrift > Issue Type: Bug > Components: Python - Library > Affects Versions: 0.10.0 > Reporter: Elvis Pranskevichus > Assignee: Aki Sukegawa > Priority: Major > > {{TBinaryProtocolAccelerated}} and {{TCompactProtocolAccelerated}} currently > accept arbitrary bytes as input to string fields even when {{py:utf8strings}} > is on. -- This message was sent by Atlassian JIRA (v7.6.3#76005)