WillAyd commented on code in PR #596:
URL: https://github.com/apache/arrow-nanoarrow/pull/596#discussion_r1750297397


##########
src/nanoarrow/common/inline_array.h:
##########
@@ -467,52 +469,116 @@ static inline ArrowErrorCode 
ArrowArrayAppendDouble(struct ArrowArray* array,
   return NANOARROW_OK;
 }
 
+#define NANOARROW_BINARY_VIEW_INLINE_SIZE 12
+#define NANOARROW_BINARY_VIEW_PREVIEW_SIZE 4
+#define NANOARROW_BINARY_VIEW_BLOCK_SIZE 32 << 10  // 32KB
+
+// The Arrow C++ implementation uses anonymous structs as members
+// of the ArrowBinaryViewType. For Cython support in this library, we define
+// those structs outside of the ArrowBinaryViewType
+struct ArrowBinaryViewTypeInlinedData {
+  int32_t size;
+  uint8_t data[NANOARROW_BINARY_VIEW_INLINE_SIZE];
+};
+
+struct ArrowBinaryViewTypeRefData {
+  int32_t size;
+  uint8_t data[NANOARROW_BINARY_VIEW_PREVIEW_SIZE];
+  int32_t buffer_index;
+  int32_t offset;
+};
+
+union ArrowBinaryViewType {  // TODO: C++ impl uses alignas which comes in C11
+  struct ArrowBinaryViewTypeInlinedData inlined;
+  struct ArrowBinaryViewTypeRefData ref;
+};
+
 static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
                                                    struct ArrowBufferView 
value) {
   struct ArrowArrayPrivateData* private_data =
       (struct ArrowArrayPrivateData*)array->private_data;
 
-  struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1);
-  struct ArrowBuffer* data_buffer = ArrowArrayBuffer(
-      array, 1 + (private_data->storage_type != 
NANOARROW_TYPE_FIXED_SIZE_BINARY));
-  int32_t offset;
-  int64_t large_offset;
-  int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8;
+  if (private_data->storage_type == NANOARROW_TYPE_STRING_VIEW ||
+      private_data->storage_type == NANOARROW_TYPE_BINARY_VIEW) {
+    struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+    union ArrowBinaryViewType bvt;
+    // TODO: bounds check
+    bvt.inlined.size = (int32_t)value.size_bytes;
 
-  switch (private_data->storage_type) {
-    case NANOARROW_TYPE_STRING:
-    case NANOARROW_TYPE_BINARY:
-      offset = ((int32_t*)offset_buffer->data)[array->length];
-      if ((((int64_t)offset) + value.size_bytes) > INT32_MAX) {
-        return EOVERFLOW;
+    if (value.size_bytes <= NANOARROW_BINARY_VIEW_INLINE_SIZE) {
+      memcpy(bvt.inlined.data, value.data.as_char, value.size_bytes);
+    } else {
+      int32_t n_vbufs = private_data->n_variadic_buffers;
+      if (n_vbufs == 0 ||
+          private_data->variadic_buffers[n_vbufs - 1].size_bytes + 
value.size_bytes >
+              NANOARROW_BINARY_VIEW_BLOCK_SIZE) {
+        // TODO: ArrowMalloc?
+        ++n_vbufs;
+        // TODO: these should be realloc for > 0 case
+        private_data->variadic_buffers =
+            (struct ArrowBuffer*)malloc(sizeof(struct ArrowBuffer) * 
(n_vbufs));

Review Comment:
   I assume the `inline_` headers intentionally do not import from anywhere 
else, but `ArrowMalloc` is defined in `nanoarrow.h` with an implementation in 
`utils.c`
   
   Would it be preferable to inline that definition/declaration into 
`inline_buffer.h`, or do we just want to stick with malloc here?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to