This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 830a2b1 ARROW-3338: [Python] Crash when schema and columns do not match 830a2b1 is described below commit 830a2b1ccc23e8683864af3461ebf92ef159c604 Author: Krisztián Szűcs <szucs.kriszt...@gmail.com> AuthorDate: Thu Sep 27 10:58:07 2018 -0400 ARROW-3338: [Python] Crash when schema and columns do not match Author: Krisztián Szűcs <szucs.kriszt...@gmail.com> Closes #2643 from kszucs/ARROW-3338 and squashes the following commits: 9389d608a <Krisztián Szűcs> make test case python27 compatible 733e18fdd <Krisztián Szűcs> fix schema validation in Table::FromRecordBatches --- cpp/src/arrow/table.cc | 2 +- python/pyarrow/tests/test_table.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc index 9919085..96c71c1 100644 --- a/cpp/src/arrow/table.cc +++ b/cpp/src/arrow/table.cc @@ -412,7 +412,7 @@ Status Table::FromRecordBatches(const std::shared_ptr<Schema>& schema, const int nbatches = static_cast<int>(batches.size()); const int ncolumns = static_cast<int>(schema->num_fields()); - for (int i = 1; i < nbatches; ++i) { + for (int i = 0; i < nbatches; ++i) { if (!batches[i]->schema()->Equals(*schema, false)) { std::stringstream ss; ss << "Schema at index " << static_cast<int>(i) << " was different: \n" diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index a6567d5..0b397f6 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -499,6 +499,27 @@ def test_recordbatchlist_schema_equals(): pa.Table.from_batches([batch1, batch2]) +def test_table_from_batches_and_schema(): + schema = pa.schema([ + pa.field('a', pa.int64()), + pa.field('b', pa.float64()), + ]) + batch = pa.RecordBatch.from_arrays([pa.array([1]), pa.array([3.14])], + names=['a', 'b']) + table = pa.Table.from_batches([batch], schema) + assert table.schema.equals(schema) + assert table.column(0) == pa.column('a', pa.array([1])) + assert table.column(1) == pa.column('b', pa.array([3.14])) + + incompatible_schema = pa.schema([pa.field('a', pa.int64())]) + with pytest.raises(pa.ArrowInvalid): + pa.Table.from_batches([batch], incompatible_schema) + + incompatible_batch = pa.RecordBatch.from_arrays([pa.array([1])], ['a']) + with pytest.raises(pa.ArrowInvalid): + pa.Table.from_batches([incompatible_batch], schema) + + def test_table_to_batches(): df1 = pd.DataFrame({'a': list(range(10))}) df2 = pd.DataFrame({'a': list(range(10, 30))})