Copilot commented on code in PR #828:
URL: https://github.com/apache/arrow-nanoarrow/pull/828#discussion_r2587258551
##########
r/tools/make-callentries.R:
##########
@@ -21,7 +21,7 @@
library(tidyverse)
-src_files <- list.files("src", "\\.(c|cpp)$", full.names = TRUE) %>%
+src_files <- list.files("src", "\\.(c|cc)$", full.names = TRUE) %>%
Review Comment:
The regex pattern `"\\.(c|cc)$"` will match files ending in `.c` or `.cc`,
but will not match `.cpp` files. If the codebase contains `.cpp` files, they
will be excluded. Consider using `"\\.(c|cc|cpp)$"` to include all C/C++ source
files, or verify that only `.c` and `.cc` files should be processed.
```suggestion
src_files <- list.files("src", "\\.(c|cc|cpp)$", full.names = TRUE) %>%
```
##########
r/src/nanoarrow_cpp.cc:
##########
@@ -201,3 +204,88 @@ extern "C" void
nanoarrow_preserve_and_release_on_other_thread(SEXP obj) {
std::thread worker([obj] { nanoarrow_release_sexp(obj); });
worker.join();
}
+
+// Collector utility for iterating over and collecting batches
+// Keeping this all in a single object reduces the amount of C++ deletion
+// we need to keep track of.
+struct ArrayVector {
+ nanoarrow::UniqueSchema schema;
+ nanoarrow::UniqueArray batch;
+ std::vector<nanoarrow::UniqueArray> vec;
+};
+
+// Use an external pointer to handle deleting the ArrayVector in
+// the event of a longjmp
+static void release_array_vector_xptr(SEXP array_vector_xptr) {
+ auto ptr =
reinterpret_cast<ArrayVector*>(R_ExternalPtrAddr(array_vector_xptr));
+ if (ptr != NULL) {
Review Comment:
Use `nullptr` instead of `NULL` in C++ code for null pointer comparisons.
This is the modern C++ standard and provides better type safety.
```suggestion
if (ptr != nullptr) {
```
##########
r/src/nanoarrow_cpp.cc:
##########
@@ -201,3 +204,88 @@ extern "C" void
nanoarrow_preserve_and_release_on_other_thread(SEXP obj) {
std::thread worker([obj] { nanoarrow_release_sexp(obj); });
worker.join();
}
+
+// Collector utility for iterating over and collecting batches
+// Keeping this all in a single object reduces the amount of C++ deletion
+// we need to keep track of.
+struct ArrayVector {
+ nanoarrow::UniqueSchema schema;
+ nanoarrow::UniqueArray batch;
+ std::vector<nanoarrow::UniqueArray> vec;
+};
+
+// Use an external pointer to handle deleting the ArrayVector in
+// the event of a longjmp
+static void release_array_vector_xptr(SEXP array_vector_xptr) {
+ auto ptr =
reinterpret_cast<ArrayVector*>(R_ExternalPtrAddr(array_vector_xptr));
+ if (ptr != NULL) {
+ delete ptr;
+ }
+}
+
+// Collects the entire array stream and collects the total number of rows and
+// total number of batches so that the R code on the end of this can decide
+// how best to proceed.
+extern "C" SEXP nanoarrow_c_collect_array_stream(SEXP array_stream_xptr, SEXP
n_sexp) {
+ struct ArrowArrayStream* array_stream =
+ nanoarrow_array_stream_from_xptr(array_stream_xptr);
+
+ double n_real = REAL(n_sexp)[0];
+ int n;
+ if (R_FINITE(n_real)) {
+ n = (int)n_real;
Review Comment:
Casting `double` to `int` can result in undefined behavior if `n_real`
exceeds the range of `int` (approximately ±2.1 billion). Add a range check
before casting to ensure `n_real` is within `INT_MIN` and `INT_MAX`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]