pitrou commented on code in PR #48975:
URL: https://github.com/apache/arrow/pull/48975#discussion_r2811726708
##########
cpp/src/arrow/ipc/message.h:
##########
@@ -469,6 +469,26 @@ Result<std::unique_ptr<Message>> ReadMessage(
const int64_t offset, const int32_t metadata_length, io::RandomAccessFile*
file,
const FieldsLoaderFunction& fields_loader = {});
+/// \brief Read encapsulated RPC message from position in file
Review Comment:
Ok, I think "RPC" is a typo, it should be "IPC", could you perhaps fix all
occurrences in this file?
##########
cpp/src/arrow/ipc/read_write_test.cc:
##########
@@ -3003,25 +3030,38 @@ void GetReadRecordBatchReadRanges(
auto read_ranges = tracked->get_read_ranges();
- // there are 3 read IOs before reading body:
- // 1) read magic and footer length IO
- // 2) read footer IO
- // 3) read record batch metadata IO
- EXPECT_EQ(read_ranges.size(), 3 + expected_body_read_lengths.size());
const int32_t magic_size =
static_cast<int>(strlen(ipc::internal::kArrowMagicBytes));
// read magic and footer length IO
auto file_end_size = magic_size + sizeof(int32_t);
auto footer_length_offset = buffer->size() - file_end_size;
auto footer_length = bit_util::FromLittleEndian(
util::SafeLoadAs<int32_t>(buffer->data() + footer_length_offset));
+
+ // read magic and footer length IO
EXPECT_EQ(read_ranges[0].length, file_end_size);
// read footer IO
EXPECT_EQ(read_ranges[1].length, footer_length);
- // read record batch metadata. The exact size is tricky to determine but it
doesn't
- // matter for this test and it should be smaller than the footer.
- EXPECT_LE(read_ranges[2].length, footer_length);
- for (uint32_t i = 0; i < expected_body_read_lengths.size(); i++) {
- EXPECT_EQ(read_ranges[3 + i].length, expected_body_read_lengths[i]);
+
+ // there are 3 read IOs before reading body:
+ // 1) read magic and footer length IO
+ // 2) read footer IO
+ // 3) read record batch metadata IO
Review Comment:
This comment only matches one of the two code paths. Perhaps fix it or use a
separate comment per code path?
##########
cpp/src/arrow/ipc/read_write_test.cc:
##########
@@ -3003,25 +3030,38 @@ void GetReadRecordBatchReadRanges(
auto read_ranges = tracked->get_read_ranges();
- // there are 3 read IOs before reading body:
- // 1) read magic and footer length IO
- // 2) read footer IO
- // 3) read record batch metadata IO
- EXPECT_EQ(read_ranges.size(), 3 + expected_body_read_lengths.size());
Review Comment:
Ok, can we keep a less strict assertion to check that we can at least access
the two first elements without segfaulting?
Something like:
```c++
// there are at least 2 read IOs before reading body:
// 1) read magic and footer length IO
// 2) read footer IO
EXPECT_GE(read_ranges.size(), 2);
```
##########
cpp/src/arrow/ipc/message.cc:
##########
@@ -421,6 +421,57 @@ Result<std::unique_ptr<Message>> ReadMessage(int64_t
offset, int32_t metadata_le
}
}
+Result<std::unique_ptr<Message>> ReadMessage(const int64_t offset,
Review Comment:
There's a lot of code duplication with the previous `ReadMessage` overload
above (the one that takes a `FieldsLoaderFunction`). Can we refactor using a
common helper so that most of the duplication goes away?
##########
cpp/src/arrow/ipc/read_write_test.cc:
##########
@@ -3003,25 +3030,38 @@ void GetReadRecordBatchReadRanges(
auto read_ranges = tracked->get_read_ranges();
- // there are 3 read IOs before reading body:
- // 1) read magic and footer length IO
- // 2) read footer IO
- // 3) read record batch metadata IO
- EXPECT_EQ(read_ranges.size(), 3 + expected_body_read_lengths.size());
const int32_t magic_size =
static_cast<int>(strlen(ipc::internal::kArrowMagicBytes));
// read magic and footer length IO
auto file_end_size = magic_size + sizeof(int32_t);
auto footer_length_offset = buffer->size() - file_end_size;
auto footer_length = bit_util::FromLittleEndian(
util::SafeLoadAs<int32_t>(buffer->data() + footer_length_offset));
+
+ // read magic and footer length IO
EXPECT_EQ(read_ranges[0].length, file_end_size);
// read footer IO
EXPECT_EQ(read_ranges[1].length, footer_length);
- // read record batch metadata. The exact size is tricky to determine but it
doesn't
- // matter for this test and it should be smaller than the footer.
- EXPECT_LE(read_ranges[2].length, footer_length);
- for (uint32_t i = 0; i < expected_body_read_lengths.size(); i++) {
- EXPECT_EQ(read_ranges[3 + i].length, expected_body_read_lengths[i]);
+
+ // there are 3 read IOs before reading body:
+ // 1) read magic and footer length IO
+ // 2) read footer IO
+ // 3) read record batch metadata IO
+ if (included_fields.empty()) {
+ EXPECT_EQ(read_ranges.size(), 3);
+
+ int64_t total_body = 0;
+ for (auto len : expected_body_read_lengths) total_body += len;
+
+ EXPECT_GT(read_ranges[2].length, total_body);
Review Comment:
Add a small comment explaining why? Also can take inspiration from the other
code path and add:
```
EXPECT_LE(read_ranges[2].length, total_body + footer_length);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]