[ 
https://issues.apache.org/jira/browse/ARROW-2454?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16439595#comment-16439595
 ] 

ASF GitHub Bot commented on ARROW-2454:
---------------------------------------

pitrou closed pull request #1897: ARROW-2454: [C++] Allow zero-array chunked 
arrays
URL: https://github.com/apache/arrow/pull/1897
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc
index b1cf6e59a2..0b9f75df19 100644
--- a/cpp/src/arrow/table-test.cc
+++ b/cpp/src/arrow/table-test.cc
@@ -43,7 +43,9 @@ class TestChunkedArray : public TestBase {
  protected:
   virtual void Construct() {
     one_ = std::make_shared<ChunkedArray>(arrays_one_);
-    another_ = std::make_shared<ChunkedArray>(arrays_another_);
+    if (!arrays_another_.empty()) {
+      another_ = std::make_shared<ChunkedArray>(arrays_another_);
+    }
   }
 
   ArrayVector arrays_one_;
@@ -121,6 +123,23 @@ TEST_F(TestChunkedArray, SliceEquals) {
   std::shared_ptr<ChunkedArray> slice2 = one_->Slice(75)->Slice(25)->Slice(25, 
50);
   ASSERT_EQ(slice2->length(), 50);
   test::AssertChunkedEqual(*slice, *slice2);
+
+  // Making empty slices of a ChunkedArray
+  std::shared_ptr<ChunkedArray> slice3 = one_->Slice(one_->length(), 99);
+  ASSERT_EQ(slice3->length(), 0);
+  ASSERT_EQ(slice3->num_chunks(), 0);
+  ASSERT_TRUE(slice3->type()->Equals(one_->type()));
+
+  std::shared_ptr<ChunkedArray> slice4 = one_->Slice(10, 0);
+  ASSERT_EQ(slice4->length(), 0);
+  ASSERT_EQ(slice4->num_chunks(), 0);
+  ASSERT_TRUE(slice4->type()->Equals(one_->type()));
+
+  // Slicing an empty ChunkedArray
+  std::shared_ptr<ChunkedArray> slice5 = slice4->Slice(0, 10);
+  ASSERT_EQ(slice5->length(), 0);
+  ASSERT_EQ(slice5->num_chunks(), 0);
+  ASSERT_TRUE(slice5->type()->Equals(one_->type()));
 }
 
 class TestColumn : public TestChunkedArray {
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index f6ac6dd3b1..8af47ea220 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -39,13 +39,25 @@ namespace arrow {
 ChunkedArray::ChunkedArray(const ArrayVector& chunks) : chunks_(chunks) {
   length_ = 0;
   null_count_ = 0;
+  DCHECK_GT(chunks.size(), 0)
+      << "cannot construct ChunkedArray from empty vector and omitted type";
+  type_ = chunks[0]->type();
   for (const std::shared_ptr<Array>& chunk : chunks) {
     length_ += chunk->length();
     null_count_ += chunk->null_count();
   }
 }
 
-std::shared_ptr<DataType> ChunkedArray::type() const { return 
chunks_[0]->type(); }
+ChunkedArray::ChunkedArray(const ArrayVector& chunks,
+                           const std::shared_ptr<DataType>& type)
+    : chunks_(chunks), type_(type) {
+  length_ = 0;
+  null_count_ = 0;
+  for (const std::shared_ptr<Array>& chunk : chunks) {
+    length_ += chunk->length();
+    null_count_ += chunk->null_count();
+  }
+}
 
 bool ChunkedArray::Equals(const ChunkedArray& other) const {
   if (length_ != other.length()) {
@@ -107,20 +119,20 @@ std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t 
offset, int64_t length
   DCHECK_LE(offset, length_);
 
   int curr_chunk = 0;
-  while (offset >= chunk(curr_chunk)->length()) {
+  while (curr_chunk < num_chunks() && offset >= chunk(curr_chunk)->length()) {
     offset -= chunk(curr_chunk)->length();
     curr_chunk++;
   }
 
   ArrayVector new_chunks;
-  while (length > 0 && curr_chunk < num_chunks()) {
+  while (curr_chunk < num_chunks() && length > 0) {
     new_chunks.push_back(chunk(curr_chunk)->Slice(offset, length));
     length -= chunk(curr_chunk)->length() - offset;
     offset = 0;
     curr_chunk++;
   }
 
-  return std::make_shared<ChunkedArray>(new_chunks);
+  return std::make_shared<ChunkedArray>(new_chunks, type_);
 }
 
 std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset) const {
@@ -129,15 +141,15 @@ std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t 
offset) const {
 
 Column::Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks)
     : field_(field) {
-  data_ = std::make_shared<ChunkedArray>(chunks);
+  data_ = std::make_shared<ChunkedArray>(chunks, field->type());
 }
 
 Column::Column(const std::shared_ptr<Field>& field, const 
std::shared_ptr<Array>& data)
     : field_(field) {
   if (!data) {
-    data_ = std::make_shared<ChunkedArray>(ArrayVector({}));
+    data_ = std::make_shared<ChunkedArray>(ArrayVector({}), field->type());
   } else {
-    data_ = std::make_shared<ChunkedArray>(ArrayVector({data}));
+    data_ = std::make_shared<ChunkedArray>(ArrayVector({data}), field->type());
   }
 }
 
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index 20d027d6a5..32af224ff4 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -40,6 +40,7 @@ class Status;
 class ARROW_EXPORT ChunkedArray {
  public:
   explicit ChunkedArray(const ArrayVector& chunks);
+  ChunkedArray(const ArrayVector& chunks, const std::shared_ptr<DataType>& 
type);
 
   /// \return the total length of the chunked array; computed on construction
   int64_t length() const { return length_; }
@@ -68,7 +69,7 @@ class ARROW_EXPORT ChunkedArray {
   /// \brief Slice from offset until end of the chunked array
   std::shared_ptr<ChunkedArray> Slice(int64_t offset) const;
 
-  std::shared_ptr<DataType> type() const;
+  std::shared_ptr<DataType> type() const { return type_; }
 
   bool Equals(const ChunkedArray& other) const;
   bool Equals(const std::shared_ptr<ChunkedArray>& other) const;
@@ -77,6 +78,7 @@ class ARROW_EXPORT ChunkedArray {
   ArrayVector chunks_;
   int64_t length_;
   int64_t null_count_;
+  std::shared_ptr<DataType> type_;
 
  private:
   ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index ce213b9995..e50760b55d 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -140,8 +140,8 @@ class ARROW_EXPORT DataType {
 
   // Return whether the types are equal
   //
-  // Types that are logically convertable from one to another e.g. List<UInt8>
-  // and Binary are NOT equal).
+  // Types that are logically convertible from one to another (e.g. List<UInt8>
+  // and Binary) are NOT equal.
   virtual bool Equals(const DataType& other) const;
   bool Equals(const std::shared_ptr<DataType>& other) const;
 
diff --git a/python/pyarrow/tests/test_table.py 
b/python/pyarrow/tests/test_table.py
index 81564352b1..5303cb2190 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -44,6 +44,10 @@ def test_chunked_array_getitem():
     data_slice = data[4:-1]
     assert data_slice.to_pylist() == [5]
 
+    data_slice = data[99:99]
+    assert data_slice.type == data.type
+    assert data_slice.to_pylist() == []
+
 
 def test_column_basics():
     data = [


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> [Python] Empty chunked array slice crashes
> ------------------------------------------
>
>                 Key: ARROW-2454
>                 URL: https://issues.apache.org/jira/browse/ARROW-2454
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: Python
>    Affects Versions: 0.9.0
>            Reporter: Antoine Pitrou
>            Assignee: Antoine Pitrou
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 0.10.0
>
>
> {code:python}
> >>> col = pa.Column.from_array('ints', pa.array([1,2,3]))
> >>> col
> <pyarrow.lib.Column object at 0x7f65398fff00>
> chunk 0: <pyarrow.lib.Int64Array object at 0x7f64fd13ab88>
> [
>   1,
>   2,
>   3
> ]
> >>> col.data
> <pyarrow.lib.ChunkedArray at 0x7f653986ef00>
> >>> col.data[:1]
> <pyarrow.lib.ChunkedArray at 0x7f6539884720>
> >>> col.data[:0]
> Erreur de segmentation (core dumped)
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to