Not really. So what’s really going on?!

TEST(TestAdapterWriteNested, writeList) {
  std::shared_ptr<Schema> table_schema = schema({field("list", list(int32()))});
  int64_t num_rows = 10000;
  arrow::random::RandomArrayGenerator rand(kRandomSeed);
  auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6);
  std::shared_ptr<Array> array = rand.List(*value_array, num_rows + 1, 0.8);
  std::shared_ptr<ChunkedArray> chunked_array = 
std::make_shared<ChunkedArray>(array);
  std::shared_ptr<Table> table = Table::Make(table_schema, {chunked_array});

  std::shared_ptr<io::BufferOutputStream> buffer_output_stream =
      io::BufferOutputStream::Create(kDefaultSmallMemStreamSize * 
15).ValueOrDie();
  std::unique_ptr<adapters::orc::ORCFileWriter> writer =
      adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
  ARROW_EXPECT_OK(writer->Write(*table));
  ARROW_EXPECT_OK(writer->Close());
  std::shared_ptr<Buffer> buffer = buffer_output_stream->Finish().ValueOrDie();
  std::shared_ptr<io::RandomAccessFile> in_stream(new io::BufferReader(buffer));
  std::unique_ptr<adapters::orc::ORCFileReader> reader;
  ARROW_EXPECT_OK(
      adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
&reader));
  std::shared_ptr<Table> actual_output_table;
  ARROW_EXPECT_OK(reader->Read(&actual_output_table));
  auto actual_array =
      
std::static_pointer_cast<ListArray>(actual_output_table->column(0)->chunk(0));
  auto expected_array = 
std::static_pointer_cast<ListArray>(table->column(0)->chunk(0));
  AssertArraysEqual(*(actual_array->offsets()), *(expected_array->offsets()));
  AssertArraysEqual(*(actual_array->values()), *(expected_array->values()));
  AssertBufferEqual(*(actual_array->null_bitmap()), 
*(expected_array->null_bitmap()));
  ASSERT_TRUE(actual_array->type()->Equals(*(expected_array->type())));
  RecordProperty("output_type", actual_array->type()->ToString());
  RecordProperty("input_type", expected_array->type()->ToString());
  RecordProperty("array_equality", actual_array->Equals(*expected_array));
}

    <testcase name="writeList" status="run" result="completed" time="0.029" 
timestamp="2021-02-10T12:33:47" classname="TestAdapterWriteNested">
<properties>
<property name="output_type" value="list&lt;item: int32&gt;"/>
<property name="input_type" value="list&lt;item: int32&gt;"/>
<property name="array_equality" value="0"/>
</properties>
    </testcase>
> On Feb 10, 2021, at 12:10 PM, Antoine Pitrou <anto...@python.org> wrote:
> 
> 
> Hmm, perhaps the types are unequal, then.  Can you print them out
> (including field metadata)?
> 
> 
> Le 10/02/2021 à 18:03, Ying Zhou a écrit :
>> Thanks! Now we have an even weirder phenomenon. Even the null bitmaps and 
>> offsets are equal. However the arrays aren’t! Does anyone know why?
>> 
>> TEST(TestAdapterWriteNested, writeList) {
>>  std::shared_ptr<Schema> table_schema = schema({field("list", 
>> list(int32()))});
>>  int64_t num_rows = 10000;
>>  arrow::random::RandomArrayGenerator rand(kRandomSeed);
>>  auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6);
>>  std::shared_ptr<Array> array = rand.List(*value_array, num_rows + 1, 0.8);
>>  std::shared_ptr<ChunkedArray> chunked_array = 
>> std::make_shared<ChunkedArray>(array);
>>  std::shared_ptr<Table> table = Table::Make(table_schema, {chunked_array});
>> 
>>  std::shared_ptr<io::BufferOutputStream> buffer_output_stream =
>>      io::BufferOutputStream::Create(kDefaultSmallMemStreamSize * 
>> 15).ValueOrDie();
>>  std::unique_ptr<adapters::orc::ORCFileWriter> writer =
>>      adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>>  ARROW_EXPECT_OK(writer->Write(*table));
>>  ARROW_EXPECT_OK(writer->Close());
>>  std::shared_ptr<Buffer> buffer = 
>> buffer_output_stream->Finish().ValueOrDie();
>>  std::shared_ptr<io::RandomAccessFile> in_stream(new 
>> io::BufferReader(buffer));
>>  std::unique_ptr<adapters::orc::ORCFileReader> reader;
>>  ARROW_EXPECT_OK(
>>      adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
>> &reader));
>>  std::shared_ptr<Table> actual_output_table;
>>  ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>>  auto actual_array =
>>      
>> std::static_pointer_cast<ListArray>(actual_output_table->column(0)->chunk(0));
>>  auto expected_array = 
>> std::static_pointer_cast<ListArray>(table->column(0)->chunk(0));
>>  AssertArraysEqual(*(actual_array->offsets()), *(expected_array->offsets()));
>>  AssertArraysEqual(*(actual_array->values()), *(expected_array->values()));
>>  AssertBufferEqual(*(actual_array->null_bitmap()), 
>> *(expected_array->null_bitmap()));
>>  RecordProperty("array_equality", actual_array->Equals(*expected_array));
>> }
>> 
>>    <testcase name="writeList" status="run" result="completed" time="0.028" 
>> timestamp="2021-02-10T11:58:23" classname="TestAdapterWriteNested">
>> <properties>
>> <property name="array_equality" value="0"/>
>> </properties>
>>    </testcase>
>> 
>>> On Feb 10, 2021, at 3:52 AM, Antoine Pitrou <anto...@python.org> wrote:
>>> 
>>> 
>>> Hi Ying,
>>> 
>>> Hmm, yes, this may be related to the null bitmaps, or the offsets.
>>> Can you try to inspect or pretty-print the offsets arrays for the two
>>> list arrays?
>>> 
>>> Regards
>>> 
>>> Antoine.
>>> 
>>> 
>>> Le 10/02/2021 à 03:26, Ying Zhou a écrit :
>>>> Hi,
>>>> 
>>>> This is an extremely weird phenomenon. There are two 2*1 tables that are 
>>>> supposedly different when I got a confusing error message like this:
>>>> 
>>>> [ RUN      ] TestAdapterWriteNested.writeList
>>>> /Users/karlkatzen/Documents/code/arrow-dev/arrow/cpp/src/arrow/testing/gtest_util.cc:459:
>>>>  Failure
>>>> Failed
>>>> Unequal at absolute position 2
>>>> Expected:
>>>> [
>>>>   [
>>>>     null,
>>>>     1074834796,
>>>>     null,
>>>>     null
>>>>   ],
>>>>   null
>>>> ]
>>>> Actual:
>>>> [
>>>>   [
>>>>     null,
>>>>     1074834796,
>>>>     null,
>>>>     null
>>>>   ],
>>>>   null
>>>> ]
>>>> [  FAILED  ] TestAdapterWriteNested.writeList (2 ms)
>>>> 
>>>> Here is the code that causes the issue:
>>>> 
>>>> TEST(TestAdapterWriteNested, writeList) {
>>>> std::shared_ptr<Schema> table_schema = schema({field("list", 
>>>> list(int32()))});
>>>> int64_t num_rows = 2;
>>>> arrow::random::RandomArrayGenerator rand(kRandomSeed);
>>>> auto value_array = rand.ArrayOf(int32(), 2 * num_rows, 0.6);
>>>> std::shared_ptr<Array> array = rand.List(*value_array, num_rows + 1, 1);
>>>> std::shared_ptr<ChunkedArray> chunked_array = 
>>>> std::make_shared<ChunkedArray>(array);
>>>> std::shared_ptr<Table> table = Table::Make(table_schema, {chunked_array});
>>>> AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize * 5);
>>>> }
>>>> 
>>>> Here AssertTableWriteReadEqual is a function I use to test that 
>>>> from_orc(to_orc(table_in)) == expected_table_out. The function did not 
>>>> have issues before.
>>>> 
>>>> void AssertTableWriteReadEqual(const std::shared_ptr<Table>& input_table,
>>>>                              const std::shared_ptr<Table>& 
>>>> expected_output_table,
>>>>                              const int64_t max_size = 
>>>> kDefaultSmallMemStreamSize) {
>>>> std::shared_ptr<io::BufferOutputStream> buffer_output_stream =
>>>>     io::BufferOutputStream::Create(max_size).ValueOrDie();
>>>> std::unique_ptr<adapters::orc::ORCFileWriter> writer =
>>>>     adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>>>> ARROW_EXPECT_OK(writer->Write(*input_table));
>>>> ARROW_EXPECT_OK(writer->Close());
>>>> std::shared_ptr<Buffer> buffer = 
>>>> buffer_output_stream->Finish().ValueOrDie();
>>>> std::shared_ptr<io::RandomAccessFile> in_stream(new 
>>>> io::BufferReader(buffer));
>>>> std::unique_ptr<adapters::orc::ORCFileReader> reader;
>>>> ARROW_EXPECT_OK(
>>>>     adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), 
>>>> &reader));
>>>> std::shared_ptr<Table> actual_output_table;
>>>> ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>>>> AssertTablesEqual(*actual_output_table, *expected_output_table, false, 
>>>> false);
>>>> }
>>>> 
>>>> I strongly suspect that this is related to the null bitmaps. What do you 
>>>> guys think?
>>>> 
>>>> Ying
>>>> 
>> 
>> 

Reply via email to