pitrou commented on a change in pull request #7456: URL: https://github.com/apache/arrow/pull/7456#discussion_r442545757
########## File path: python/pyarrow/tests/test_io.py ########## @@ -1289,6 +1290,56 @@ def test_compressed_recordbatch_stream(compression): assert got_table == table +# ---------------------------------------------------------------------- +# Transform input streams + +unicode_transcoding_example = ( + "Dès Noël où un zéphyr haï me vêt de glaçons würmiens " + "je dîne d’exquis rôtis de bœuf au kir à l’aÿ d’âge mûr & cætera !" +) + + +def check_transcoding(data, src_encoding, dest_encoding, chunk_sizes): + chunk_sizes = iter(chunk_sizes) + stream = pa.transcoding_input_stream( + pa.BufferReader(data.encode(src_encoding)), + src_encoding, dest_encoding) + out = [] + while True: + buf = stream.read(next(chunk_sizes)) + out.append(buf) + if not buf: + break + out = b''.join(out) + assert out.decode(dest_encoding) == data + + +@pytest.mark.parametrize('src_encoding, dest_encoding', + [('utf-8', 'utf-16'), + ('utf-16', 'utf-8'), + ('utf-8', 'utf-32-le'), + ('utf-8', 'utf-32-be'), + ]) +def test_transcoding_input_stream(src_encoding, dest_encoding): + # All at once + check_transcoding(unicode_transcoding_example, + src_encoding, dest_encoding, [1000, 0]) + # Incremental + check_transcoding(unicode_transcoding_example, Review comment: TODO: should perhaps exercise encoding errors ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org