Re: [C++] Why are these two tables unequal?

2021-02-11 Thread Antoine Pitrou


Ok, it was a bug in RandomArrayGenerator::List().  I've pushed a fix to
your PR, sorry for that.

We'll need to add tests for random array generation :-)

Regards

Antoine.


Le 10/02/2021 à 18:49, Ying Zhou a écrit :
> Yup. That doesn’t change anything. I have just pushed this to 
> https://github.com/apache/arrow/pull/8648 
>  . Please take a look. Really 
> thanks!
> 
> TEST(TestAdapterWriteNested, writeList) {
>   std::shared_ptr table_schema = schema({field("list", 
> list(int32()))});
>   int64_t num_rows = 1;
>   arrow::random::RandomArrayGenerator rand(kRandomSeed);
>   auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6);
>   std::shared_ptr array = rand.List(*value_array, num_rows + 1, 0.8);
>   std::shared_ptr chunked_array = 
> std::make_shared(array);
>   std::shared_ptr table = Table::Make(table_schema, {chunked_array});
> 
>   std::shared_ptr buffer_output_stream =
>   io::BufferOutputStream::Create(kDefaultSmallMemStreamSize * 
> 15).ValueOrDie();
>   std::unique_ptr writer =
>   adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>   ARROW_EXPECT_OK(writer->Write(*table));
>   ARROW_EXPECT_OK(writer->Close());
>   std::shared_ptr buffer = 
> buffer_output_stream->Finish().ValueOrDie();
>   std::shared_ptr in_stream(new 
> io::BufferReader(buffer));
>   std::unique_ptr reader;
>   ARROW_EXPECT_OK(
>   adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
> &reader));
>   std::shared_ptr actual_output_table;
>   ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>   auto actual_array =
>   
> std::static_pointer_cast(actual_output_table->column(0)->chunk(0));
>   auto expected_array = 
> std::static_pointer_cast(table->column(0)->chunk(0));
>   AssertArraysEqual(*(actual_array->offsets()), *(expected_array->offsets()));
>   AssertArraysEqual(*(actual_array->values()), *(expected_array->values()));
>   AssertBufferEqual(*(actual_array->null_bitmap()), 
> *(expected_array->null_bitmap()));
>   ASSERT_TRUE(actual_array->type()->Equals(*(expected_array->type()), true));
>   RecordProperty("output_type", actual_array->type()->ToString());
>   RecordProperty("input_type", expected_array->type()->ToString());
>   RecordProperty("array_equality", actual_array->Equals(*expected_array));
> }
> 
>  timestamp="2021-02-10T12:46:13" classname="TestAdapterWriteNested">
> 
> 
> 
> 
> 
> 
>> On Feb 10, 2021, at 12:43 PM, Antoine Pitrou  wrote:
>>
>> check_metadata = true
> 
> 


Re: [C++] Why are these two tables unequal?

2021-02-10 Thread Ying Zhou
Yup. That doesn’t change anything. I have just pushed this to 
https://github.com/apache/arrow/pull/8648 
 . Please take a look. Really thanks!

TEST(TestAdapterWriteNested, writeList) {
  std::shared_ptr table_schema = schema({field("list", list(int32()))});
  int64_t num_rows = 1;
  arrow::random::RandomArrayGenerator rand(kRandomSeed);
  auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6);
  std::shared_ptr array = rand.List(*value_array, num_rows + 1, 0.8);
  std::shared_ptr chunked_array = 
std::make_shared(array);
  std::shared_ptr table = Table::Make(table_schema, {chunked_array});

  std::shared_ptr buffer_output_stream =
  io::BufferOutputStream::Create(kDefaultSmallMemStreamSize * 
15).ValueOrDie();
  std::unique_ptr writer =
  adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
  ARROW_EXPECT_OK(writer->Write(*table));
  ARROW_EXPECT_OK(writer->Close());
  std::shared_ptr buffer = buffer_output_stream->Finish().ValueOrDie();
  std::shared_ptr in_stream(new io::BufferReader(buffer));
  std::unique_ptr reader;
  ARROW_EXPECT_OK(
  adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
&reader));
  std::shared_ptr actual_output_table;
  ARROW_EXPECT_OK(reader->Read(&actual_output_table));
  auto actual_array =
  
std::static_pointer_cast(actual_output_table->column(0)->chunk(0));
  auto expected_array = 
std::static_pointer_cast(table->column(0)->chunk(0));
  AssertArraysEqual(*(actual_array->offsets()), *(expected_array->offsets()));
  AssertArraysEqual(*(actual_array->values()), *(expected_array->values()));
  AssertBufferEqual(*(actual_array->null_bitmap()), 
*(expected_array->null_bitmap()));
  ASSERT_TRUE(actual_array->type()->Equals(*(expected_array->type()), true));
  RecordProperty("output_type", actual_array->type()->ToString());
  RecordProperty("input_type", expected_array->type()->ToString());
  RecordProperty("array_equality", actual_array->Equals(*expected_array));
}








> On Feb 10, 2021, at 12:43 PM, Antoine Pitrou  wrote:
> 
> check_metadata = true



Re: [C++] Why are these two tables unequal?

2021-02-10 Thread Antoine Pitrou


Just to be sure, have you tried to pass `check_metadata = true` as the
optional argument for `DataType::Equals`?

If that doesn't change anything, perhaps you can push your code
somewhere so that I (or someone else) can take a look.


Le 10/02/2021 à 18:39, Ying Zhou a écrit :
> Not really. So what’s really going on?!
> 
> TEST(TestAdapterWriteNested, writeList) {
>   std::shared_ptr table_schema = schema({field("list", 
> list(int32()))});
>   int64_t num_rows = 1;
>   arrow::random::RandomArrayGenerator rand(kRandomSeed);
>   auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6);
>   std::shared_ptr array = rand.List(*value_array, num_rows + 1, 0.8);
>   std::shared_ptr chunked_array = 
> std::make_shared(array);
>   std::shared_ptr table = Table::Make(table_schema, {chunked_array});
> 
>   std::shared_ptr buffer_output_stream =
>   io::BufferOutputStream::Create(kDefaultSmallMemStreamSize * 
> 15).ValueOrDie();
>   std::unique_ptr writer =
>   adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>   ARROW_EXPECT_OK(writer->Write(*table));
>   ARROW_EXPECT_OK(writer->Close());
>   std::shared_ptr buffer = 
> buffer_output_stream->Finish().ValueOrDie();
>   std::shared_ptr in_stream(new 
> io::BufferReader(buffer));
>   std::unique_ptr reader;
>   ARROW_EXPECT_OK(
>   adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
> &reader));
>   std::shared_ptr actual_output_table;
>   ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>   auto actual_array =
>   
> std::static_pointer_cast(actual_output_table->column(0)->chunk(0));
>   auto expected_array = 
> std::static_pointer_cast(table->column(0)->chunk(0));
>   AssertArraysEqual(*(actual_array->offsets()), *(expected_array->offsets()));
>   AssertArraysEqual(*(actual_array->values()), *(expected_array->values()));
>   AssertBufferEqual(*(actual_array->null_bitmap()), 
> *(expected_array->null_bitmap()));
>   ASSERT_TRUE(actual_array->type()->Equals(*(expected_array->type(;
>   RecordProperty("output_type", actual_array->type()->ToString());
>   RecordProperty("input_type", expected_array->type()->ToString());
>   RecordProperty("array_equality", actual_array->Equals(*expected_array));
> }
> 
>  timestamp="2021-02-10T12:33:47" classname="TestAdapterWriteNested">
> 
> 
> 
> 
> 
> 
>> On Feb 10, 2021, at 12:10 PM, Antoine Pitrou  wrote:
>>
>>
>> Hmm, perhaps the types are unequal, then.  Can you print them out
>> (including field metadata)?
>>
>>
>> Le 10/02/2021 à 18:03, Ying Zhou a écrit :
>>> Thanks! Now we have an even weirder phenomenon. Even the null bitmaps and 
>>> offsets are equal. However the arrays aren’t! Does anyone know why?
>>>
>>> TEST(TestAdapterWriteNested, writeList) {
>>>  std::shared_ptr table_schema = schema({field("list", 
>>> list(int32()))});
>>>  int64_t num_rows = 1;
>>>  arrow::random::RandomArrayGenerator rand(kRandomSeed);
>>>  auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6);
>>>  std::shared_ptr array = rand.List(*value_array, num_rows + 1, 0.8);
>>>  std::shared_ptr chunked_array = 
>>> std::make_shared(array);
>>>  std::shared_ptr table = Table::Make(table_schema, {chunked_array});
>>>
>>>  std::shared_ptr buffer_output_stream =
>>>  io::BufferOutputStream::Create(kDefaultSmallMemStreamSize * 
>>> 15).ValueOrDie();
>>>  std::unique_ptr writer =
>>>  adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>>>  ARROW_EXPECT_OK(writer->Write(*table));
>>>  ARROW_EXPECT_OK(writer->Close());
>>>  std::shared_ptr buffer = 
>>> buffer_output_stream->Finish().ValueOrDie();
>>>  std::shared_ptr in_stream(new 
>>> io::BufferReader(buffer));
>>>  std::unique_ptr reader;
>>>  ARROW_EXPECT_OK(
>>>  adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
>>> &reader));
>>>  std::shared_ptr actual_output_table;
>>>  ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>>>  auto actual_array =
>>>  
>>> std::static_pointer_cast(actual_output_table->column(0)->chunk(0));
>>>  auto expected_array = 
>>> std::static_pointer_cast(table->column(0)->chunk(0));
>>>  AssertArraysEqual(*(actual_array->offsets()), 
>>> *(expected_array->offsets()));
>>>  AssertArraysEqual(*(actual_array->values()), *(expected_array->values()));
>>>  AssertBufferEqual(*(actual_array->null_bitmap()), 
>>> *(expected_array->null_bitmap()));
>>>  RecordProperty("array_equality", actual_array->Equals(*expected_array));
>>> }
>>>
>>>>> timestamp="2021-02-10T11:58:23" classname="TestAdapterWriteNested">
>>> 
>>> 
>>> 
>>>
>>>
 On Feb 10, 2021, at 3:52 AM, Antoine Pitrou  wrote:


 Hi Ying,

 Hmm, yes, this may be related to the null bitmaps, or the offsets.
 Can you try to inspect or pretty-print the offsets arrays for the two
 list arrays?

 Regards

 Antoine.


 Le 10/02/2021 à 03:26, Ying Zhou a écrit :
> Hi,
>
> This is an extremely weird phenomenon. There

Re: [C++] Why are these two tables unequal?

2021-02-10 Thread Ying Zhou
Not really. So what’s really going on?!

TEST(TestAdapterWriteNested, writeList) {
  std::shared_ptr table_schema = schema({field("list", list(int32()))});
  int64_t num_rows = 1;
  arrow::random::RandomArrayGenerator rand(kRandomSeed);
  auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6);
  std::shared_ptr array = rand.List(*value_array, num_rows + 1, 0.8);
  std::shared_ptr chunked_array = 
std::make_shared(array);
  std::shared_ptr table = Table::Make(table_schema, {chunked_array});

  std::shared_ptr buffer_output_stream =
  io::BufferOutputStream::Create(kDefaultSmallMemStreamSize * 
15).ValueOrDie();
  std::unique_ptr writer =
  adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
  ARROW_EXPECT_OK(writer->Write(*table));
  ARROW_EXPECT_OK(writer->Close());
  std::shared_ptr buffer = buffer_output_stream->Finish().ValueOrDie();
  std::shared_ptr in_stream(new io::BufferReader(buffer));
  std::unique_ptr reader;
  ARROW_EXPECT_OK(
  adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
&reader));
  std::shared_ptr actual_output_table;
  ARROW_EXPECT_OK(reader->Read(&actual_output_table));
  auto actual_array =
  
std::static_pointer_cast(actual_output_table->column(0)->chunk(0));
  auto expected_array = 
std::static_pointer_cast(table->column(0)->chunk(0));
  AssertArraysEqual(*(actual_array->offsets()), *(expected_array->offsets()));
  AssertArraysEqual(*(actual_array->values()), *(expected_array->values()));
  AssertBufferEqual(*(actual_array->null_bitmap()), 
*(expected_array->null_bitmap()));
  ASSERT_TRUE(actual_array->type()->Equals(*(expected_array->type(;
  RecordProperty("output_type", actual_array->type()->ToString());
  RecordProperty("input_type", expected_array->type()->ToString());
  RecordProperty("array_equality", actual_array->Equals(*expected_array));
}








> On Feb 10, 2021, at 12:10 PM, Antoine Pitrou  wrote:
> 
> 
> Hmm, perhaps the types are unequal, then.  Can you print them out
> (including field metadata)?
> 
> 
> Le 10/02/2021 à 18:03, Ying Zhou a écrit :
>> Thanks! Now we have an even weirder phenomenon. Even the null bitmaps and 
>> offsets are equal. However the arrays aren’t! Does anyone know why?
>> 
>> TEST(TestAdapterWriteNested, writeList) {
>>  std::shared_ptr table_schema = schema({field("list", 
>> list(int32()))});
>>  int64_t num_rows = 1;
>>  arrow::random::RandomArrayGenerator rand(kRandomSeed);
>>  auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6);
>>  std::shared_ptr array = rand.List(*value_array, num_rows + 1, 0.8);
>>  std::shared_ptr chunked_array = 
>> std::make_shared(array);
>>  std::shared_ptr table = Table::Make(table_schema, {chunked_array});
>> 
>>  std::shared_ptr buffer_output_stream =
>>  io::BufferOutputStream::Create(kDefaultSmallMemStreamSize * 
>> 15).ValueOrDie();
>>  std::unique_ptr writer =
>>  adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>>  ARROW_EXPECT_OK(writer->Write(*table));
>>  ARROW_EXPECT_OK(writer->Close());
>>  std::shared_ptr buffer = 
>> buffer_output_stream->Finish().ValueOrDie();
>>  std::shared_ptr in_stream(new 
>> io::BufferReader(buffer));
>>  std::unique_ptr reader;
>>  ARROW_EXPECT_OK(
>>  adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
>> &reader));
>>  std::shared_ptr actual_output_table;
>>  ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>>  auto actual_array =
>>  
>> std::static_pointer_cast(actual_output_table->column(0)->chunk(0));
>>  auto expected_array = 
>> std::static_pointer_cast(table->column(0)->chunk(0));
>>  AssertArraysEqual(*(actual_array->offsets()), *(expected_array->offsets()));
>>  AssertArraysEqual(*(actual_array->values()), *(expected_array->values()));
>>  AssertBufferEqual(*(actual_array->null_bitmap()), 
>> *(expected_array->null_bitmap()));
>>  RecordProperty("array_equality", actual_array->Equals(*expected_array));
>> }
>> 
>>> timestamp="2021-02-10T11:58:23" classname="TestAdapterWriteNested">
>> 
>> 
>> 
>>
>> 
>>> On Feb 10, 2021, at 3:52 AM, Antoine Pitrou  wrote:
>>> 
>>> 
>>> Hi Ying,
>>> 
>>> Hmm, yes, this may be related to the null bitmaps, or the offsets.
>>> Can you try to inspect or pretty-print the offsets arrays for the two
>>> list arrays?
>>> 
>>> Regards
>>> 
>>> Antoine.
>>> 
>>> 
>>> Le 10/02/2021 à 03:26, Ying Zhou a écrit :
 Hi,
 
 This is an extremely weird phenomenon. There are two 2*1 tables that are 
 supposedly different when I got a confusing error message like this:
 
 [ RUN  ] TestAdapterWriteNested.writeList
 /Users/karlkatzen/Documents/code/arrow-dev/arrow/cpp/src/arrow/testing/gtest_util.cc:459:
  Failure
 Failed
 Unequal at absolute position 2
 Expected:
 [
   [
 null,
 1074834796,
 null,
 null
   ],
   null
 ]
 Actual:
 [
   [
 null,
 1074834796,
 nul

Re: [C++] Why are these two tables unequal?

2021-02-10 Thread Antoine Pitrou


Hmm, perhaps the types are unequal, then.  Can you print them out
(including field metadata)?


Le 10/02/2021 à 18:03, Ying Zhou a écrit :
> Thanks! Now we have an even weirder phenomenon. Even the null bitmaps and 
> offsets are equal. However the arrays aren’t! Does anyone know why?
> 
> TEST(TestAdapterWriteNested, writeList) {
>   std::shared_ptr table_schema = schema({field("list", 
> list(int32()))});
>   int64_t num_rows = 1;
>   arrow::random::RandomArrayGenerator rand(kRandomSeed);
>   auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6);
>   std::shared_ptr array = rand.List(*value_array, num_rows + 1, 0.8);
>   std::shared_ptr chunked_array = 
> std::make_shared(array);
>   std::shared_ptr table = Table::Make(table_schema, {chunked_array});
> 
>   std::shared_ptr buffer_output_stream =
>   io::BufferOutputStream::Create(kDefaultSmallMemStreamSize * 
> 15).ValueOrDie();
>   std::unique_ptr writer =
>   adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>   ARROW_EXPECT_OK(writer->Write(*table));
>   ARROW_EXPECT_OK(writer->Close());
>   std::shared_ptr buffer = 
> buffer_output_stream->Finish().ValueOrDie();
>   std::shared_ptr in_stream(new 
> io::BufferReader(buffer));
>   std::unique_ptr reader;
>   ARROW_EXPECT_OK(
>   adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
> &reader));
>   std::shared_ptr actual_output_table;
>   ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>   auto actual_array =
>   
> std::static_pointer_cast(actual_output_table->column(0)->chunk(0));
>   auto expected_array = 
> std::static_pointer_cast(table->column(0)->chunk(0));
>   AssertArraysEqual(*(actual_array->offsets()), *(expected_array->offsets()));
>   AssertArraysEqual(*(actual_array->values()), *(expected_array->values()));
>   AssertBufferEqual(*(actual_array->null_bitmap()), 
> *(expected_array->null_bitmap()));
>   RecordProperty("array_equality", actual_array->Equals(*expected_array));
> }
> 
>  timestamp="2021-02-10T11:58:23" classname="TestAdapterWriteNested">
> 
> 
> 
> 
> 
>> On Feb 10, 2021, at 3:52 AM, Antoine Pitrou  wrote:
>>
>>
>> Hi Ying,
>>
>> Hmm, yes, this may be related to the null bitmaps, or the offsets.
>> Can you try to inspect or pretty-print the offsets arrays for the two
>> list arrays?
>>
>> Regards
>>
>> Antoine.
>>
>>
>> Le 10/02/2021 à 03:26, Ying Zhou a écrit :
>>> Hi,
>>>
>>> This is an extremely weird phenomenon. There are two 2*1 tables that are 
>>> supposedly different when I got a confusing error message like this:
>>>
>>> [ RUN  ] TestAdapterWriteNested.writeList
>>> /Users/karlkatzen/Documents/code/arrow-dev/arrow/cpp/src/arrow/testing/gtest_util.cc:459:
>>>  Failure
>>> Failed
>>> Unequal at absolute position 2
>>> Expected:
>>>  [
>>>[
>>>  null,
>>>  1074834796,
>>>  null,
>>>  null
>>>],
>>>null
>>>  ]
>>> Actual:
>>>  [
>>>[
>>>  null,
>>>  1074834796,
>>>  null,
>>>  null
>>>],
>>>null
>>>  ]
>>> [  FAILED  ] TestAdapterWriteNested.writeList (2 ms)
>>>
>>> Here is the code that causes the issue:
>>>
>>> TEST(TestAdapterWriteNested, writeList) {
>>>  std::shared_ptr table_schema = schema({field("list", 
>>> list(int32()))});
>>>  int64_t num_rows = 2;
>>>  arrow::random::RandomArrayGenerator rand(kRandomSeed);
>>>  auto value_array = rand.ArrayOf(int32(), 2 * num_rows, 0.6);
>>>  std::shared_ptr array = rand.List(*value_array, num_rows + 1, 1);
>>>  std::shared_ptr chunked_array = 
>>> std::make_shared(array);
>>>  std::shared_ptr table = Table::Make(table_schema, {chunked_array});
>>>  AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize * 5);
>>> }
>>>
>>> Here AssertTableWriteReadEqual is a function I use to test that 
>>> from_orc(to_orc(table_in)) == expected_table_out. The function did not have 
>>> issues before.
>>>
>>> void AssertTableWriteReadEqual(const std::shared_ptr& input_table,
>>>   const std::shared_ptr& 
>>> expected_output_table,
>>>   const int64_t max_size = 
>>> kDefaultSmallMemStreamSize) {
>>>  std::shared_ptr buffer_output_stream =
>>>  io::BufferOutputStream::Create(max_size).ValueOrDie();
>>>  std::unique_ptr writer =
>>>  adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>>>  ARROW_EXPECT_OK(writer->Write(*input_table));
>>>  ARROW_EXPECT_OK(writer->Close());
>>>  std::shared_ptr buffer = 
>>> buffer_output_stream->Finish().ValueOrDie();
>>>  std::shared_ptr in_stream(new 
>>> io::BufferReader(buffer));
>>>  std::unique_ptr reader;
>>>  ARROW_EXPECT_OK(
>>>  adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
>>> &reader));
>>>  std::shared_ptr actual_output_table;
>>>  ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>>>  AssertTablesEqual(*actual_output_table, *expected_output_table, false, 
>>> false);
>>> }
>>>
>>> I strongly suspect that this is related to the null bitm

Re: [C++] Why are these two tables unequal?

2021-02-10 Thread Ying Zhou
Thanks! Now we have an even weirder phenomenon. Even the null bitmaps and 
offsets are equal. However the arrays aren’t! Does anyone know why?

TEST(TestAdapterWriteNested, writeList) {
  std::shared_ptr table_schema = schema({field("list", list(int32()))});
  int64_t num_rows = 1;
  arrow::random::RandomArrayGenerator rand(kRandomSeed);
  auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6);
  std::shared_ptr array = rand.List(*value_array, num_rows + 1, 0.8);
  std::shared_ptr chunked_array = 
std::make_shared(array);
  std::shared_ptr table = Table::Make(table_schema, {chunked_array});

  std::shared_ptr buffer_output_stream =
  io::BufferOutputStream::Create(kDefaultSmallMemStreamSize * 
15).ValueOrDie();
  std::unique_ptr writer =
  adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
  ARROW_EXPECT_OK(writer->Write(*table));
  ARROW_EXPECT_OK(writer->Close());
  std::shared_ptr buffer = buffer_output_stream->Finish().ValueOrDie();
  std::shared_ptr in_stream(new io::BufferReader(buffer));
  std::unique_ptr reader;
  ARROW_EXPECT_OK(
  adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
&reader));
  std::shared_ptr actual_output_table;
  ARROW_EXPECT_OK(reader->Read(&actual_output_table));
  auto actual_array =
  
std::static_pointer_cast(actual_output_table->column(0)->chunk(0));
  auto expected_array = 
std::static_pointer_cast(table->column(0)->chunk(0));
  AssertArraysEqual(*(actual_array->offsets()), *(expected_array->offsets()));
  AssertArraysEqual(*(actual_array->values()), *(expected_array->values()));
  AssertBufferEqual(*(actual_array->null_bitmap()), 
*(expected_array->null_bitmap()));
  RecordProperty("array_equality", actual_array->Equals(*expected_array));
}







> On Feb 10, 2021, at 3:52 AM, Antoine Pitrou  wrote:
> 
> 
> Hi Ying,
> 
> Hmm, yes, this may be related to the null bitmaps, or the offsets.
> Can you try to inspect or pretty-print the offsets arrays for the two
> list arrays?
> 
> Regards
> 
> Antoine.
> 
> 
> Le 10/02/2021 à 03:26, Ying Zhou a écrit :
>> Hi,
>> 
>> This is an extremely weird phenomenon. There are two 2*1 tables that are 
>> supposedly different when I got a confusing error message like this:
>> 
>> [ RUN  ] TestAdapterWriteNested.writeList
>> /Users/karlkatzen/Documents/code/arrow-dev/arrow/cpp/src/arrow/testing/gtest_util.cc:459:
>>  Failure
>> Failed
>> Unequal at absolute position 2
>> Expected:
>>  [
>>[
>>  null,
>>  1074834796,
>>  null,
>>  null
>>],
>>null
>>  ]
>> Actual:
>>  [
>>[
>>  null,
>>  1074834796,
>>  null,
>>  null
>>],
>>null
>>  ]
>> [  FAILED  ] TestAdapterWriteNested.writeList (2 ms)
>> 
>> Here is the code that causes the issue:
>> 
>> TEST(TestAdapterWriteNested, writeList) {
>>  std::shared_ptr table_schema = schema({field("list", 
>> list(int32()))});
>>  int64_t num_rows = 2;
>>  arrow::random::RandomArrayGenerator rand(kRandomSeed);
>>  auto value_array = rand.ArrayOf(int32(), 2 * num_rows, 0.6);
>>  std::shared_ptr array = rand.List(*value_array, num_rows + 1, 1);
>>  std::shared_ptr chunked_array = 
>> std::make_shared(array);
>>  std::shared_ptr table = Table::Make(table_schema, {chunked_array});
>>  AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize * 5);
>> }
>> 
>> Here AssertTableWriteReadEqual is a function I use to test that 
>> from_orc(to_orc(table_in)) == expected_table_out. The function did not have 
>> issues before.
>> 
>> void AssertTableWriteReadEqual(const std::shared_ptr& input_table,
>>   const std::shared_ptr& 
>> expected_output_table,
>>   const int64_t max_size = 
>> kDefaultSmallMemStreamSize) {
>>  std::shared_ptr buffer_output_stream =
>>  io::BufferOutputStream::Create(max_size).ValueOrDie();
>>  std::unique_ptr writer =
>>  adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>>  ARROW_EXPECT_OK(writer->Write(*input_table));
>>  ARROW_EXPECT_OK(writer->Close());
>>  std::shared_ptr buffer = 
>> buffer_output_stream->Finish().ValueOrDie();
>>  std::shared_ptr in_stream(new 
>> io::BufferReader(buffer));
>>  std::unique_ptr reader;
>>  ARROW_EXPECT_OK(
>>  adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
>> &reader));
>>  std::shared_ptr actual_output_table;
>>  ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>>  AssertTablesEqual(*actual_output_table, *expected_output_table, false, 
>> false);
>> }
>> 
>> I strongly suspect that this is related to the null bitmaps. What do you 
>> guys think?
>> 
>> Ying
>> 



Re: [C++] Why are these two tables unequal?

2021-02-10 Thread Antoine Pitrou


Hi Ying,

Hmm, yes, this may be related to the null bitmaps, or the offsets.
Can you try to inspect or pretty-print the offsets arrays for the two
list arrays?

Regards

Antoine.


Le 10/02/2021 à 03:26, Ying Zhou a écrit :
> Hi,
> 
> This is an extremely weird phenomenon. There are two 2*1 tables that are 
> supposedly different when I got a confusing error message like this:
> 
> [ RUN  ] TestAdapterWriteNested.writeList
> /Users/karlkatzen/Documents/code/arrow-dev/arrow/cpp/src/arrow/testing/gtest_util.cc:459:
>  Failure
> Failed
> Unequal at absolute position 2
> Expected:
>   [
> [
>   null,
>   1074834796,
>   null,
>   null
> ],
> null
>   ]
> Actual:
>   [
> [
>   null,
>   1074834796,
>   null,
>   null
> ],
> null
>   ]
> [  FAILED  ] TestAdapterWriteNested.writeList (2 ms)
> 
> Here is the code that causes the issue:
> 
> TEST(TestAdapterWriteNested, writeList) {
>   std::shared_ptr table_schema = schema({field("list", 
> list(int32()))});
>   int64_t num_rows = 2;
>   arrow::random::RandomArrayGenerator rand(kRandomSeed);
>   auto value_array = rand.ArrayOf(int32(), 2 * num_rows, 0.6);
>   std::shared_ptr array = rand.List(*value_array, num_rows + 1, 1);
>   std::shared_ptr chunked_array = 
> std::make_shared(array);
>   std::shared_ptr table = Table::Make(table_schema, {chunked_array});
>   AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize * 5);
> }
> 
> Here AssertTableWriteReadEqual is a function I use to test that 
> from_orc(to_orc(table_in)) == expected_table_out. The function did not have 
> issues before.
> 
> void AssertTableWriteReadEqual(const std::shared_ptr& input_table,
>const std::shared_ptr& 
> expected_output_table,
>const int64_t max_size = 
> kDefaultSmallMemStreamSize) {
>   std::shared_ptr buffer_output_stream =
>   io::BufferOutputStream::Create(max_size).ValueOrDie();
>   std::unique_ptr writer =
>   adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>   ARROW_EXPECT_OK(writer->Write(*input_table));
>   ARROW_EXPECT_OK(writer->Close());
>   std::shared_ptr buffer = 
> buffer_output_stream->Finish().ValueOrDie();
>   std::shared_ptr in_stream(new 
> io::BufferReader(buffer));
>   std::unique_ptr reader;
>   ARROW_EXPECT_OK(
>   adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
> &reader));
>   std::shared_ptr actual_output_table;
>   ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>   AssertTablesEqual(*actual_output_table, *expected_output_table, false, 
> false);
> }
> 
> I strongly suspect that this is related to the null bitmaps. What do you guys 
> think?
> 
> Ying
> 


[C++] Why are these two tables unequal?

2021-02-09 Thread Ying Zhou
Hi,

This is an extremely weird phenomenon. There are two 2*1 tables that are 
supposedly different when I got a confusing error message like this:

[ RUN  ] TestAdapterWriteNested.writeList
/Users/karlkatzen/Documents/code/arrow-dev/arrow/cpp/src/arrow/testing/gtest_util.cc:459:
 Failure
Failed
Unequal at absolute position 2
Expected:
  [
[
  null,
  1074834796,
  null,
  null
],
null
  ]
Actual:
  [
[
  null,
  1074834796,
  null,
  null
],
null
  ]
[  FAILED  ] TestAdapterWriteNested.writeList (2 ms)

Here is the code that causes the issue:

TEST(TestAdapterWriteNested, writeList) {
  std::shared_ptr table_schema = schema({field("list", list(int32()))});
  int64_t num_rows = 2;
  arrow::random::RandomArrayGenerator rand(kRandomSeed);
  auto value_array = rand.ArrayOf(int32(), 2 * num_rows, 0.6);
  std::shared_ptr array = rand.List(*value_array, num_rows + 1, 1);
  std::shared_ptr chunked_array = 
std::make_shared(array);
  std::shared_ptr table = Table::Make(table_schema, {chunked_array});
  AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize * 5);
}

Here AssertTableWriteReadEqual is a function I use to test that 
from_orc(to_orc(table_in)) == expected_table_out. The function did not have 
issues before.

void AssertTableWriteReadEqual(const std::shared_ptr& input_table,
   const std::shared_ptr& 
expected_output_table,
   const int64_t max_size = 
kDefaultSmallMemStreamSize) {
  std::shared_ptr buffer_output_stream =
  io::BufferOutputStream::Create(max_size).ValueOrDie();
  std::unique_ptr writer =
  adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
  ARROW_EXPECT_OK(writer->Write(*input_table));
  ARROW_EXPECT_OK(writer->Close());
  std::shared_ptr buffer = buffer_output_stream->Finish().ValueOrDie();
  std::shared_ptr in_stream(new io::BufferReader(buffer));
  std::unique_ptr reader;
  ARROW_EXPECT_OK(
  adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
&reader));
  std::shared_ptr actual_output_table;
  ARROW_EXPECT_OK(reader->Read(&actual_output_table));
  AssertTablesEqual(*actual_output_table, *expected_output_table, false, false);
}

I strongly suspect that this is related to the null bitmaps. What do you guys 
think?

Ying