pitrou commented on code in PR #48975:
URL: https://github.com/apache/arrow/pull/48975#discussion_r2811726708


##########
cpp/src/arrow/ipc/message.h:
##########
@@ -469,6 +469,26 @@ Result<std::unique_ptr<Message>> ReadMessage(
     const int64_t offset, const int32_t metadata_length, io::RandomAccessFile* 
file,
     const FieldsLoaderFunction& fields_loader = {});
 
+/// \brief Read encapsulated RPC message from position in file

Review Comment:
   Ok, I think "RPC" is a typo, it should be "IPC", could you perhaps fix all 
occurrences in this file?



##########
cpp/src/arrow/ipc/read_write_test.cc:
##########
@@ -3003,25 +3030,38 @@ void GetReadRecordBatchReadRanges(
 
   auto read_ranges = tracked->get_read_ranges();
 
-  // there are 3 read IOs before reading body:
-  // 1) read magic and footer length IO
-  // 2) read footer IO
-  // 3) read record batch metadata IO
-  EXPECT_EQ(read_ranges.size(), 3 + expected_body_read_lengths.size());
   const int32_t magic_size = 
static_cast<int>(strlen(ipc::internal::kArrowMagicBytes));
   // read magic and footer length IO
   auto file_end_size = magic_size + sizeof(int32_t);
   auto footer_length_offset = buffer->size() - file_end_size;
   auto footer_length = bit_util::FromLittleEndian(
       util::SafeLoadAs<int32_t>(buffer->data() + footer_length_offset));
+
+  // read magic and footer length IO
   EXPECT_EQ(read_ranges[0].length, file_end_size);
   // read footer IO
   EXPECT_EQ(read_ranges[1].length, footer_length);
-  // read record batch metadata.  The exact size is tricky to determine but it 
doesn't
-  // matter for this test and it should be smaller than the footer.
-  EXPECT_LE(read_ranges[2].length, footer_length);
-  for (uint32_t i = 0; i < expected_body_read_lengths.size(); i++) {
-    EXPECT_EQ(read_ranges[3 + i].length, expected_body_read_lengths[i]);
+
+  // there are 3 read IOs before reading body:
+  // 1) read magic and footer length IO
+  // 2) read footer IO
+  // 3) read record batch metadata IO

Review Comment:
   This comment only matches one of the two code paths. Perhaps fix it or use a 
separate comment per code path?



##########
cpp/src/arrow/ipc/read_write_test.cc:
##########
@@ -3003,25 +3030,38 @@ void GetReadRecordBatchReadRanges(
 
   auto read_ranges = tracked->get_read_ranges();
 
-  // there are 3 read IOs before reading body:
-  // 1) read magic and footer length IO
-  // 2) read footer IO
-  // 3) read record batch metadata IO
-  EXPECT_EQ(read_ranges.size(), 3 + expected_body_read_lengths.size());

Review Comment:
   Ok, can we keep a less strict assertion to check that we can at least access 
the two first elements without segfaulting?
   Something like:
   ```c++
     // there are at least 2 read IOs before reading body:
     // 1) read magic and footer length IO
     // 2) read footer IO
     EXPECT_GE(read_ranges.size(), 2);
   ```



##########
cpp/src/arrow/ipc/message.cc:
##########
@@ -421,6 +421,57 @@ Result<std::unique_ptr<Message>> ReadMessage(int64_t 
offset, int32_t metadata_le
   }
 }
 
+Result<std::unique_ptr<Message>> ReadMessage(const int64_t offset,

Review Comment:
   There's a lot of code duplication with the previous `ReadMessage` overload 
above (the one that takes a `FieldsLoaderFunction`). Can we refactor using a 
common helper so that most of the duplication goes away?



##########
cpp/src/arrow/ipc/read_write_test.cc:
##########
@@ -3003,25 +3030,38 @@ void GetReadRecordBatchReadRanges(
 
   auto read_ranges = tracked->get_read_ranges();
 
-  // there are 3 read IOs before reading body:
-  // 1) read magic and footer length IO
-  // 2) read footer IO
-  // 3) read record batch metadata IO
-  EXPECT_EQ(read_ranges.size(), 3 + expected_body_read_lengths.size());
   const int32_t magic_size = 
static_cast<int>(strlen(ipc::internal::kArrowMagicBytes));
   // read magic and footer length IO
   auto file_end_size = magic_size + sizeof(int32_t);
   auto footer_length_offset = buffer->size() - file_end_size;
   auto footer_length = bit_util::FromLittleEndian(
       util::SafeLoadAs<int32_t>(buffer->data() + footer_length_offset));
+
+  // read magic and footer length IO
   EXPECT_EQ(read_ranges[0].length, file_end_size);
   // read footer IO
   EXPECT_EQ(read_ranges[1].length, footer_length);
-  // read record batch metadata.  The exact size is tricky to determine but it 
doesn't
-  // matter for this test and it should be smaller than the footer.
-  EXPECT_LE(read_ranges[2].length, footer_length);
-  for (uint32_t i = 0; i < expected_body_read_lengths.size(); i++) {
-    EXPECT_EQ(read_ranges[3 + i].length, expected_body_read_lengths[i]);
+
+  // there are 3 read IOs before reading body:
+  // 1) read magic and footer length IO
+  // 2) read footer IO
+  // 3) read record batch metadata IO
+  if (included_fields.empty()) {
+    EXPECT_EQ(read_ranges.size(), 3);
+
+    int64_t total_body = 0;
+    for (auto len : expected_body_read_lengths) total_body += len;
+
+    EXPECT_GT(read_ranges[2].length, total_body);

Review Comment:
   Add a small comment explaining why? Also can take inspiration from the other 
code path and add:
   ```
     EXPECT_LE(read_ranges[2].length, total_body + footer_length);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to