Copilot commented on code in PR #828:
URL: https://github.com/apache/arrow-nanoarrow/pull/828#discussion_r2587258551


##########
r/tools/make-callentries.R:
##########
@@ -21,7 +21,7 @@
 
 library(tidyverse)
 
-src_files <- list.files("src", "\\.(c|cpp)$", full.names = TRUE) %>%
+src_files <- list.files("src", "\\.(c|cc)$", full.names = TRUE) %>%

Review Comment:
   The regex pattern `"\\.(c|cc)$"` will match files ending in `.c` or `.cc`, 
but will not match `.cpp` files. If the codebase contains `.cpp` files, they 
will be excluded. Consider using `"\\.(c|cc|cpp)$"` to include all C/C++ source 
files, or verify that only `.c` and `.cc` files should be processed.
   ```suggestion
   src_files <- list.files("src", "\\.(c|cc|cpp)$", full.names = TRUE) %>%
   ```



##########
r/src/nanoarrow_cpp.cc:
##########
@@ -201,3 +204,88 @@ extern "C" void 
nanoarrow_preserve_and_release_on_other_thread(SEXP obj) {
   std::thread worker([obj] { nanoarrow_release_sexp(obj); });
   worker.join();
 }
+
+// Collector utility for iterating over and collecting batches
+// Keeping this all in a single object reduces the amount of C++ deletion
+// we need to keep track of.
+struct ArrayVector {
+  nanoarrow::UniqueSchema schema;
+  nanoarrow::UniqueArray batch;
+  std::vector<nanoarrow::UniqueArray> vec;
+};
+
+// Use an external pointer to handle deleting the ArrayVector in
+// the event of a longjmp
+static void release_array_vector_xptr(SEXP array_vector_xptr) {
+  auto ptr = 
reinterpret_cast<ArrayVector*>(R_ExternalPtrAddr(array_vector_xptr));
+  if (ptr != NULL) {

Review Comment:
   Use `nullptr` instead of `NULL` in C++ code for null pointer comparisons. 
This is the modern C++ standard and provides better type safety.
   ```suggestion
     if (ptr != nullptr) {
   ```



##########
r/src/nanoarrow_cpp.cc:
##########
@@ -201,3 +204,88 @@ extern "C" void 
nanoarrow_preserve_and_release_on_other_thread(SEXP obj) {
   std::thread worker([obj] { nanoarrow_release_sexp(obj); });
   worker.join();
 }
+
+// Collector utility for iterating over and collecting batches
+// Keeping this all in a single object reduces the amount of C++ deletion
+// we need to keep track of.
+struct ArrayVector {
+  nanoarrow::UniqueSchema schema;
+  nanoarrow::UniqueArray batch;
+  std::vector<nanoarrow::UniqueArray> vec;
+};
+
+// Use an external pointer to handle deleting the ArrayVector in
+// the event of a longjmp
+static void release_array_vector_xptr(SEXP array_vector_xptr) {
+  auto ptr = 
reinterpret_cast<ArrayVector*>(R_ExternalPtrAddr(array_vector_xptr));
+  if (ptr != NULL) {
+    delete ptr;
+  }
+}
+
+// Collects the entire array stream and collects the total number of rows and
+// total number of batches so that the R code on the end of this can decide
+// how best to proceed.
+extern "C" SEXP nanoarrow_c_collect_array_stream(SEXP array_stream_xptr, SEXP 
n_sexp) {
+  struct ArrowArrayStream* array_stream =
+      nanoarrow_array_stream_from_xptr(array_stream_xptr);
+
+  double n_real = REAL(n_sexp)[0];
+  int n;
+  if (R_FINITE(n_real)) {
+    n = (int)n_real;

Review Comment:
   Casting `double` to `int` can result in undefined behavior if `n_real` 
exceeds the range of `int` (approximately ±2.1 billion). Add a range check 
before casting to ensure `n_real` is within `INT_MIN` and `INT_MAX`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to