bkietz commented on code in PR #596:
URL: https://github.com/apache/arrow-nanoarrow/pull/596#discussion_r1769320763
##########
src/nanoarrow/common/inline_array.h:
##########
@@ -467,52 +468,136 @@ static inline ArrowErrorCode
ArrowArrayAppendDouble(struct ArrowArray* array,
return NANOARROW_OK;
}
+#define NANOARROW_BINARY_VIEW_FIXED_BUFFERS 2
+#define NANOARROW_BINARY_VIEW_INLINE_SIZE 12
+#define NANOARROW_BINARY_VIEW_PREVIEW_SIZE 4
+#define NANOARROW_BINARY_VIEW_BLOCK_SIZE (32 << 10) // 32KB
+
+// The Arrow C++ implementation uses anonymous structs as members
+// of the ArrowBinaryViewType. For Cython support in this library, we define
+// those structs outside of the ArrowBinaryViewType
+struct ArrowBinaryViewTypeInlinedData {
+ int32_t size;
+ uint8_t data[NANOARROW_BINARY_VIEW_INLINE_SIZE];
+};
+
+struct ArrowBinaryViewTypeRefData {
+ int32_t size;
+ uint8_t data[NANOARROW_BINARY_VIEW_PREVIEW_SIZE];
+ int32_t buffer_index;
+ int32_t offset;
+};
+
+union ArrowBinaryViewType {
+ struct ArrowBinaryViewTypeInlinedData inlined;
+ struct ArrowBinaryViewTypeRefData ref;
+ int64_t alignment_dummy;
+};
+
+static inline ArrowErrorCode ArrowArrayAddVariadicBuffers(struct ArrowArray*
array,
+ int32_t nbuffers,
+ int64_t*
new_buffer_start) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ const int32_t n_current_bufs = private_data->n_variadic_buffers;
+ const int32_t n_bufs_needed = n_current_bufs + nbuffers;
+
+ private_data->variadic_buffers = (struct ArrowBuffer*)ArrowRealloc(
+ private_data->variadic_buffers, sizeof(struct ArrowBuffer) *
n_bufs_needed);
+ if (private_data->variadic_buffers == NULL) {
+ return ENOMEM;
+ }
+ private_data->variadic_buffer_sizes = (int64_t*)ArrowRealloc(
+ private_data->variadic_buffer_sizes, sizeof(int64_t) * n_bufs_needed);
+ if (private_data->variadic_buffer_sizes == NULL) {
+ return ENOMEM;
+ }
+
+ for (int32_t i = 0; i < nbuffers; i++) {
+ ArrowBufferInit(&private_data->variadic_buffers[n_current_bufs + i]);
+ }
+ private_data->n_variadic_buffers = n_bufs_needed;
+ *new_buffer_start = n_current_bufs;
+
+ return NANOARROW_OK;
+}
+
static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
struct ArrowBufferView
value) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
- struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1);
- struct ArrowBuffer* data_buffer = ArrowArrayBuffer(
- array, 1 + (private_data->storage_type !=
NANOARROW_TYPE_FIXED_SIZE_BINARY));
- int32_t offset;
- int64_t large_offset;
- int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8;
+ if (private_data->storage_type == NANOARROW_TYPE_STRING_VIEW ||
+ private_data->storage_type == NANOARROW_TYPE_BINARY_VIEW) {
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+ union ArrowBinaryViewType bvt;
+ bvt.inlined.size = (int32_t)value.size_bytes;
- switch (private_data->storage_type) {
- case NANOARROW_TYPE_STRING:
- case NANOARROW_TYPE_BINARY:
- offset = ((int32_t*)offset_buffer->data)[array->length];
- if ((((int64_t)offset) + value.size_bytes) > INT32_MAX) {
- return EOVERFLOW;
+ if (value.size_bytes <= NANOARROW_BINARY_VIEW_INLINE_SIZE) {
+ memcpy(bvt.inlined.data, value.data.as_char, value.size_bytes);
+ } else {
+ const int32_t n_vbufs = private_data->n_variadic_buffers;
+ int64_t buf_index = n_vbufs - 1;
+ if (n_vbufs == 0 ||
+ private_data->variadic_buffers[n_vbufs - 1].size_bytes +
value.size_bytes >
+ NANOARROW_BINARY_VIEW_BLOCK_SIZE) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAddVariadicBuffers(array, 1,
&buf_index));
Review Comment:
Ah, I wrote this before observing that you aren't presizing data buffers
https://github.com/apache/arrow-nanoarrow/pull/596#discussion_r1769271911
Since you aren't presizing this isn't a bug
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]