[ https://issues.apache.org/jira/browse/ARROW-2195?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Philipp Moritz updated ARROW-2195: ---------------------------------- Description: It can be reproduced with the following script: {code:java} {code} import pyarrow as pa import pyarrow.plasma as plasma def retrieve1(): client = plasma.connect('test', "", 0) key = "keynumber1keynumber1" pid = plasma.ObjectID(bytearray(key,'UTF-8')) [buff] = client .get_buffers([pid]) batch = pa.RecordBatchStreamReader(buff).read_next_batch() print(batch) print(batch.schema) print(batch[0]) return batch client = plasma.connect('test', "", 0) test1 = [1, 12, 23, 3, 21, 34] test1 = pa.array(test1, pa.int32()) batch = pa.RecordBatch.from_arrays([test1], ['FIELD1']) key = "keynumber1keynumber1" pid = plasma.ObjectID(bytearray(key,'UTF-8')) sink = pa.MockOutputStream() stream_writer = pa.RecordBatchStreamWriter(sink, batch.schema) stream_writer.write_batch(batch) stream_writer.close() bff = client.create(pid, sink.size()) stream = pa.FixedSizeBufferWriter(bff) writer = pa.RecordBatchStreamWriter(stream, batch.schema) writer.write_batch(batch) client.seal(pid) batch = retrieve1() print(batch) print(batch.schema) print(batch[0]) {code:java} {code} Preliminary backtrace: ``` CESS (code=1, address=0x111138158) frame #0: 0x000000010e6457fc lib.so`__pyx_pw_7pyarrow_3lib_10Int32Value_1as_py(_object*, _object*) + 28 lib.so`__pyx_pw_7pyarrow_3lib_10Int32Value_1as_py: -> 0x10e6457fc <+28>: movslq (%rdx,%rcx,4), %rdi 0x10e645800 <+32>: callq 0x10e698170 ; symbol stub for: PyInt_FromLong 0x10e645805 <+37>: testq %rax, %rax 0x10e645808 <+40>: je 0x10e64580c ; <+44> (lldb) bt * thread #1: tid = 0xf1378e, 0x000000010e6457fc lib.so`__pyx_pw_7pyarrow_3lib_10Int32Value_1as_py(_object*, _object*) + 28, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1, address=0x111138158) * frame #0: 0x000000010e6457fc lib.so`__pyx_pw_7pyarrow_3lib_10Int32Value_1as_py(_object*, _object*) + 28 frame #1: 0x000000010e5ccd35 lib.so`__Pyx_PyObject_CallNoArg(_object*) + 133 frame #2: 0x000000010e613b25 lib.so`__pyx_pw_7pyarrow_3lib_10ArrayValue_3__repr__(_object*) + 933 frame #3: 0x000000010c2f83bc libpython2.7.dylib`PyObject_Repr + 60 frame #4: 0x000000010c35f651 libpython2.7.dylib`PyEval_EvalFrameEx + 22305 ``` was: It can be reproduced with the following script: ``` import pyarrow as pa import pyarrow.plasma as plasma def retrieve1(): client = plasma.connect('test', "", 0) key = "keynumber1keynumber1" pid = plasma.ObjectID(bytearray(key,'UTF-8')) [buff] = client .get_buffers([pid]) batch = pa.RecordBatchStreamReader(buff).read_next_batch() print(batch) print(batch.schema) print(batch[0]) return batch client = plasma.connect('test', "", 0) test1 = [1, 12, 23, 3, 21, 34] test1 = pa.array(test1, pa.int32()) batch = pa.RecordBatch.from_arrays([test1], ['FIELD1']) key = "keynumber1keynumber1" pid = plasma.ObjectID(bytearray(key,'UTF-8')) sink = pa.MockOutputStream() stream_writer = pa.RecordBatchStreamWriter(sink, batch.schema) stream_writer.write_batch(batch) stream_writer.close() bff = client.create(pid, sink.size()) stream = pa.FixedSizeBufferWriter(bff) writer = pa.RecordBatchStreamWriter(stream, batch.schema) writer.write_batch(batch) client.seal(pid) batch = retrieve1() print(batch) print(batch.schema) print(batch[0]) ``` Preliminary backtrace: ``` CESS (code=1, address=0x111138158) frame #0: 0x000000010e6457fc lib.so`__pyx_pw_7pyarrow_3lib_10Int32Value_1as_py(_object*, _object*) + 28 lib.so`__pyx_pw_7pyarrow_3lib_10Int32Value_1as_py: -> 0x10e6457fc <+28>: movslq (%rdx,%rcx,4), %rdi 0x10e645800 <+32>: callq 0x10e698170 ; symbol stub for: PyInt_FromLong 0x10e645805 <+37>: testq %rax, %rax 0x10e645808 <+40>: je 0x10e64580c ; <+44> (lldb) bt * thread #1: tid = 0xf1378e, 0x000000010e6457fc lib.so`__pyx_pw_7pyarrow_3lib_10Int32Value_1as_py(_object*, _object*) + 28, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1, address=0x111138158) * frame #0: 0x000000010e6457fc lib.so`__pyx_pw_7pyarrow_3lib_10Int32Value_1as_py(_object*, _object*) + 28 frame #1: 0x000000010e5ccd35 lib.so`__Pyx_PyObject_CallNoArg(_object*) + 133 frame #2: 0x000000010e613b25 lib.so`__pyx_pw_7pyarrow_3lib_10ArrayValue_3__repr__(_object*) + 933 frame #3: 0x000000010c2f83bc libpython2.7.dylib`PyObject_Repr + 60 frame #4: 0x000000010c35f651 libpython2.7.dylib`PyEval_EvalFrameEx + 22305 ``` > [Plasma] Segfault when retrieving RecordBatch from plasma store > --------------------------------------------------------------- > > Key: ARROW-2195 > URL: https://issues.apache.org/jira/browse/ARROW-2195 > Project: Apache Arrow > Issue Type: Improvement > Reporter: Philipp Moritz > Priority: Major > > It can be reproduced with the following script: > {code:java} > {code} > import pyarrow as pa > import pyarrow.plasma as plasma > def retrieve1(): > client = plasma.connect('test', "", 0) > key = "keynumber1keynumber1" > pid = plasma.ObjectID(bytearray(key,'UTF-8')) > [buff] = client .get_buffers([pid]) > batch = pa.RecordBatchStreamReader(buff).read_next_batch() > print(batch) > print(batch.schema) > print(batch[0]) > return batch > client = plasma.connect('test', "", 0) > test1 = [1, 12, 23, 3, 21, 34] > test1 = pa.array(test1, pa.int32()) > batch = pa.RecordBatch.from_arrays([test1], ['FIELD1']) > key = "keynumber1keynumber1" > pid = plasma.ObjectID(bytearray(key,'UTF-8')) > sink = pa.MockOutputStream() > stream_writer = pa.RecordBatchStreamWriter(sink, batch.schema) > stream_writer.write_batch(batch) > stream_writer.close() > bff = client.create(pid, sink.size()) > stream = pa.FixedSizeBufferWriter(bff) > writer = pa.RecordBatchStreamWriter(stream, batch.schema) > writer.write_batch(batch) > client.seal(pid) > batch = retrieve1() > print(batch) > print(batch.schema) > print(batch[0]) > {code:java} > {code} > > Preliminary backtrace: > > ``` > CESS (code=1, address=0x111138158) > frame #0: 0x000000010e6457fc > lib.so`__pyx_pw_7pyarrow_3lib_10Int32Value_1as_py(_object*, _object*) + 28 > lib.so`__pyx_pw_7pyarrow_3lib_10Int32Value_1as_py: > -> 0x10e6457fc <+28>: movslq (%rdx,%rcx,4), %rdi > 0x10e645800 <+32>: callq 0x10e698170 ; symbol stub for: > PyInt_FromLong > 0x10e645805 <+37>: testq %rax, %rax > 0x10e645808 <+40>: je 0x10e64580c ; <+44> > (lldb) bt > * thread #1: tid = 0xf1378e, 0x000000010e6457fc > lib.so`__pyx_pw_7pyarrow_3lib_10Int32Value_1as_py(_object*, _object*) + 28, > queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1, > address=0x111138158) > * frame #0: 0x000000010e6457fc > lib.so`__pyx_pw_7pyarrow_3lib_10Int32Value_1as_py(_object*, _object*) + 28 > frame #1: 0x000000010e5ccd35 lib.so`__Pyx_PyObject_CallNoArg(_object*) + > 133 > frame #2: 0x000000010e613b25 > lib.so`__pyx_pw_7pyarrow_3lib_10ArrayValue_3__repr__(_object*) + 933 > frame #3: 0x000000010c2f83bc libpython2.7.dylib`PyObject_Repr + 60 > frame #4: 0x000000010c35f651 libpython2.7.dylib`PyEval_EvalFrameEx + 22305 > ``` -- This message was sent by Atlassian JIRA (v7.6.3#76005)