[ https://issues.apache.org/jira/browse/ARROW-2261?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16388395#comment-16388395 ]
ASF GitHub Bot commented on ARROW-2261: --------------------------------------- xhochy closed pull request #1701: ARROW-2261: [GLib] Improve memory management for GArrowBuffer data URL: https://github.com/apache/arrow/pull/1701 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/c_glib/arrow-glib/buffer.cpp b/c_glib/arrow-glib/buffer.cpp index 4be8fed18..4dd9ce33a 100644 --- a/c_glib/arrow-glib/buffer.cpp +++ b/c_glib/arrow-glib/buffer.cpp @@ -47,11 +47,13 @@ G_BEGIN_DECLS typedef struct GArrowBufferPrivate_ { std::shared_ptr<arrow::Buffer> buffer; + GBytes *data; } GArrowBufferPrivate; enum { PROP_0, - PROP_BUFFER + PROP_BUFFER, + PROP_DATA }; G_DEFINE_TYPE_WITH_PRIVATE(GArrowBuffer, garrow_buffer, G_TYPE_OBJECT) @@ -59,6 +61,19 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowBuffer, garrow_buffer, G_TYPE_OBJECT) #define GARROW_BUFFER_GET_PRIVATE(obj) \ (G_TYPE_INSTANCE_GET_PRIVATE((obj), GARROW_TYPE_BUFFER, GArrowBufferPrivate)) +static void +garrow_buffer_dispose(GObject *object) +{ + auto priv = GARROW_BUFFER_GET_PRIVATE(object); + + if (priv->data) { + g_bytes_unref(priv->data); + priv->data = nullptr; + } + + G_OBJECT_CLASS(garrow_buffer_parent_class)->dispose(object); +} + static void garrow_buffer_finalize(GObject *object) { @@ -71,9 +86,9 @@ garrow_buffer_finalize(GObject *object) static void garrow_buffer_set_property(GObject *object, - guint prop_id, - const GValue *value, - GParamSpec *pspec) + guint prop_id, + const GValue *value, + GParamSpec *pspec) { auto priv = GARROW_BUFFER_GET_PRIVATE(object); @@ -82,6 +97,9 @@ garrow_buffer_set_property(GObject *object, priv->buffer = *static_cast<std::shared_ptr<arrow::Buffer> *>(g_value_get_pointer(value)); break; + case PROP_DATA: + priv->data = static_cast<GBytes *>(g_value_dup_boxed(value)); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); break; @@ -90,11 +108,16 @@ garrow_buffer_set_property(GObject *object, static void garrow_buffer_get_property(GObject *object, - guint prop_id, - GValue *value, - GParamSpec *pspec) + guint prop_id, + GValue *value, + GParamSpec *pspec) { + auto priv = GARROW_BUFFER_GET_PRIVATE(object); + switch (prop_id) { + case PROP_DATA: + g_value_set_boxed(value, priv->data); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); break; @@ -113,6 +136,7 @@ garrow_buffer_class_init(GArrowBufferClass *klass) auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->dispose = garrow_buffer_dispose; gobject_class->finalize = garrow_buffer_finalize; gobject_class->set_property = garrow_buffer_set_property; gobject_class->get_property = garrow_buffer_get_property; @@ -123,6 +147,14 @@ garrow_buffer_class_init(GArrowBufferClass *klass) static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY)); g_object_class_install_property(gobject_class, PROP_BUFFER, spec); + + spec = g_param_spec_boxed("data", + "Data", + "The raw data passed as GBytes *", + G_TYPE_BYTES, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DATA, spec); } /** @@ -141,7 +173,25 @@ garrow_buffer_new(const guint8 *data, gint64 size) { auto arrow_buffer = std::make_shared<arrow::Buffer>(data, size); return garrow_buffer_new_raw(&arrow_buffer); +} +/** + * garrow_buffer_new_bytes: + * @data: Data for the buffer. + * + * Returns: A newly created #GArrowBuffer. + * + * Since: 0.9.0 + */ +GArrowBuffer * +garrow_buffer_new_bytes(GBytes *data) +{ + size_t data_size; + auto raw_data = g_bytes_get_data(data, &data_size); + auto arrow_buffer = + std::make_shared<arrow::Buffer>(static_cast<const uint8_t *>(raw_data), + data_size); + return garrow_buffer_new_raw_bytes(&arrow_buffer, data); } /** @@ -226,6 +276,12 @@ garrow_buffer_get_capacity(GArrowBuffer *buffer) GBytes * garrow_buffer_get_data(GArrowBuffer *buffer) { + auto priv = GARROW_BUFFER_GET_PRIVATE(buffer); + if (priv->data) { + g_bytes_ref(priv->data); + return priv->data; + } + auto arrow_buffer = garrow_buffer_get_raw(buffer); auto data = g_bytes_new_static(arrow_buffer->data(), arrow_buffer->size()); @@ -249,6 +305,13 @@ garrow_buffer_get_mutable_data(GArrowBuffer *buffer) if (!arrow_buffer->is_mutable()) { return NULL; } + + auto priv = GARROW_BUFFER_GET_PRIVATE(buffer); + if (priv->data) { + g_bytes_ref(priv->data); + return priv->data; + } + return g_bytes_new_static(arrow_buffer->mutable_data(), arrow_buffer->size()); } @@ -284,7 +347,8 @@ garrow_buffer_get_parent(GArrowBuffer *buffer) auto arrow_parent_buffer = arrow_buffer->parent(); if (arrow_parent_buffer) { - return garrow_buffer_new_raw(&arrow_parent_buffer); + auto priv = GARROW_BUFFER_GET_PRIVATE(buffer); + return garrow_buffer_new_raw_bytes(&arrow_parent_buffer, priv->data); } else { return NULL; } @@ -338,7 +402,8 @@ garrow_buffer_slice(GArrowBuffer *buffer, gint64 offset, gint64 size) auto arrow_buffer = std::make_shared<arrow::Buffer>(arrow_parent_buffer, offset, size); - return garrow_buffer_new_raw(&arrow_buffer); + auto priv = GARROW_BUFFER_GET_PRIVATE(buffer); + return garrow_buffer_new_raw_bytes(&arrow_buffer, priv->data); } @@ -374,6 +439,26 @@ garrow_mutable_buffer_new(guint8 *data, gint64 size) return garrow_mutable_buffer_new_raw(&arrow_buffer); } +/** + * garrow_mutable_buffer_new_bytes: + * @data: Data for the buffer. + * + * Returns: A newly created #GArrowMutableBuffer. + * + * Since: 0.9.0 + */ +GArrowMutableBuffer * +garrow_mutable_buffer_new_bytes(GBytes *data) +{ + size_t data_size; + auto raw_data = g_bytes_get_data(data, &data_size); + auto mutable_raw_data = const_cast<gpointer>(raw_data); + auto arrow_buffer = + std::make_shared<arrow::MutableBuffer>(static_cast<uint8_t *>(mutable_raw_data), + data_size); + return garrow_mutable_buffer_new_raw_bytes(&arrow_buffer, data); +} + /** * garrow_mutable_buffer_slice: * @buffer: A #GArrowMutableBuffer. @@ -397,7 +482,8 @@ garrow_mutable_buffer_slice(GArrowMutableBuffer *buffer, std::make_shared<arrow::MutableBuffer>(arrow_parent_buffer, offset, size); - return garrow_mutable_buffer_new_raw(&arrow_buffer); + auto priv = GARROW_BUFFER_GET_PRIVATE(buffer); + return garrow_mutable_buffer_new_raw_bytes(&arrow_buffer, priv->data); } @@ -494,9 +580,17 @@ G_END_DECLS GArrowBuffer * garrow_buffer_new_raw(std::shared_ptr<arrow::Buffer> *arrow_buffer) +{ + return garrow_buffer_new_raw_bytes(arrow_buffer, nullptr); +} + +GArrowBuffer * +garrow_buffer_new_raw_bytes(std::shared_ptr<arrow::Buffer> *arrow_buffer, + GBytes *data) { auto buffer = GARROW_BUFFER(g_object_new(GARROW_TYPE_BUFFER, "buffer", arrow_buffer, + "data", data, NULL)); return buffer; } @@ -513,9 +607,17 @@ garrow_buffer_get_raw(GArrowBuffer *buffer) GArrowMutableBuffer * garrow_mutable_buffer_new_raw(std::shared_ptr<arrow::MutableBuffer> *arrow_buffer) +{ + return garrow_mutable_buffer_new_raw_bytes(arrow_buffer, nullptr); +} + +GArrowMutableBuffer * +garrow_mutable_buffer_new_raw_bytes(std::shared_ptr<arrow::MutableBuffer> *arrow_buffer, + GBytes *data) { auto buffer = GARROW_MUTABLE_BUFFER(g_object_new(GARROW_TYPE_MUTABLE_BUFFER, "buffer", arrow_buffer, + "data", data, NULL)); return buffer; } diff --git a/c_glib/arrow-glib/buffer.h b/c_glib/arrow-glib/buffer.h index 300bb4f4e..50debcc05 100644 --- a/c_glib/arrow-glib/buffer.h +++ b/c_glib/arrow-glib/buffer.h @@ -36,6 +36,7 @@ struct _GArrowBufferClass GArrowBuffer *garrow_buffer_new (const guint8 *data, gint64 size); +GArrowBuffer *garrow_buffer_new_bytes (GBytes *data); gboolean garrow_buffer_equal (GArrowBuffer *buffer, GArrowBuffer *other_buffer); gboolean garrow_buffer_equal_n_bytes(GArrowBuffer *buffer, @@ -70,6 +71,7 @@ struct _GArrowMutableBufferClass GArrowMutableBuffer *garrow_mutable_buffer_new (guint8 *data, gint64 size); +GArrowMutableBuffer *garrow_mutable_buffer_new_bytes(GBytes *data); GArrowMutableBuffer *garrow_mutable_buffer_slice(GArrowMutableBuffer *buffer, gint64 offset, gint64 size); diff --git a/c_glib/arrow-glib/buffer.hpp b/c_glib/arrow-glib/buffer.hpp index d1664b11b..3dd37945a 100644 --- a/c_glib/arrow-glib/buffer.hpp +++ b/c_glib/arrow-glib/buffer.hpp @@ -24,7 +24,11 @@ #include <arrow-glib/buffer.h> GArrowBuffer *garrow_buffer_new_raw(std::shared_ptr<arrow::Buffer> *arrow_buffer); +GArrowBuffer *garrow_buffer_new_raw_bytes(std::shared_ptr<arrow::Buffer> *arrow_buffer, + GBytes *data); std::shared_ptr<arrow::Buffer> garrow_buffer_get_raw(GArrowBuffer *buffer); GArrowMutableBuffer *garrow_mutable_buffer_new_raw(std::shared_ptr<arrow::MutableBuffer> *arrow_buffer); +GArrowMutableBuffer *garrow_mutable_buffer_new_raw_bytes(std::shared_ptr<arrow::MutableBuffer> *arrow_buffer, + GBytes *data); GArrowPoolBuffer *garrow_pool_buffer_new_raw(std::shared_ptr<arrow::PoolBuffer> *arrow_buffer); diff --git a/c_glib/arrow-glib/input-stream.cpp b/c_glib/arrow-glib/input-stream.cpp index f602e5f7e..c643ad2a7 100644 --- a/c_glib/arrow-glib/input-stream.cpp +++ b/c_glib/arrow-glib/input-stream.cpp @@ -282,16 +282,79 @@ garrow_seekable_input_stream_read_tensor(GArrowSeekableInputStream *input_stream arrow_random_access_file.get(), &arrow_tensor); if (garrow_error_check(error, status, "[seekable-input-stream][read-tensor]")) { - return garrow_tensor_new_raw(&arrow_tensor, nullptr); + return garrow_tensor_new_raw(&arrow_tensor); } else { return NULL; } } -G_DEFINE_TYPE(GArrowBufferInputStream, \ - garrow_buffer_input_stream, \ - GARROW_TYPE_SEEKABLE_INPUT_STREAM); +typedef struct GArrowBufferInputStreamPrivate_ { + GArrowBuffer *buffer; +} GArrowBufferInputStreamPrivate; + +enum { + PROP_0_, + PROP_BUFFER +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowBufferInputStream, \ + garrow_buffer_input_stream, \ + GARROW_TYPE_SEEKABLE_INPUT_STREAM); + +#define GARROW_BUFFER_INPUT_STREAM_GET_PRIVATE(obj) \ + (G_TYPE_INSTANCE_GET_PRIVATE((obj), \ + GARROW_TYPE_BUFFER_INPUT_STREAM, \ + GArrowBufferInputStreamPrivate)) + +static void +garrow_buffer_input_stream_dispose(GObject *object) +{ + auto priv = GARROW_BUFFER_INPUT_STREAM_GET_PRIVATE(object); + + if (priv->buffer) { + g_object_unref(priv->buffer); + priv->buffer = nullptr; + } + + G_OBJECT_CLASS(garrow_buffer_input_stream_parent_class)->dispose(object); +} + +static void +garrow_buffer_input_stream_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_BUFFER_INPUT_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_BUFFER: + priv->buffer = GARROW_BUFFER(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_buffer_input_stream_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_BUFFER_INPUT_STREAM_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_BUFFER: + g_value_set_object(value, priv->buffer); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} static void garrow_buffer_input_stream_init(GArrowBufferInputStream *object) @@ -301,6 +364,21 @@ garrow_buffer_input_stream_init(GArrowBufferInputStream *object) static void garrow_buffer_input_stream_class_init(GArrowBufferInputStreamClass *klass) { + GParamSpec *spec; + + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_buffer_input_stream_dispose; + gobject_class->set_property = garrow_buffer_input_stream_set_property; + gobject_class->get_property = garrow_buffer_input_stream_get_property; + + spec = g_param_spec_object("buffer", + "Buffer", + "The data", + GARROW_TYPE_BUFFER, + static_cast<GParamFlags>(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_BUFFER, spec); } /** @@ -315,18 +393,24 @@ garrow_buffer_input_stream_new(GArrowBuffer *buffer) auto arrow_buffer = garrow_buffer_get_raw(buffer); auto arrow_buffer_reader = std::make_shared<arrow::io::BufferReader>(arrow_buffer); - return garrow_buffer_input_stream_new_raw(&arrow_buffer_reader); + return garrow_buffer_input_stream_new_raw_buffer(&arrow_buffer_reader, buffer); } /** * garrow_buffer_input_stream_get_buffer: * @input_stream: A #GArrowBufferInputStream. * - * Returns: (transfer full): The data of the array as #GArrowBuffer. + * Returns: (transfer full): The data of the stream as #GArrowBuffer. */ GArrowBuffer * garrow_buffer_input_stream_get_buffer(GArrowBufferInputStream *input_stream) { + auto priv = GARROW_BUFFER_INPUT_STREAM_GET_PRIVATE(input_stream); + if (priv->buffer) { + g_object_ref(priv->buffer); + return priv->buffer; + } + auto arrow_buffer_reader = garrow_buffer_input_stream_get_raw(input_stream); auto arrow_buffer = arrow_buffer_reader->buffer(); return garrow_buffer_new_raw(&arrow_buffer); @@ -626,10 +710,18 @@ garrow_seekable_input_stream_get_raw(GArrowSeekableInputStream *seekable_input_s GArrowBufferInputStream * garrow_buffer_input_stream_new_raw(std::shared_ptr<arrow::io::BufferReader> *arrow_buffer_reader) +{ + return garrow_buffer_input_stream_new_raw_buffer(arrow_buffer_reader, nullptr); +} + +GArrowBufferInputStream * +garrow_buffer_input_stream_new_raw_buffer(std::shared_ptr<arrow::io::BufferReader> *arrow_buffer_reader, + GArrowBuffer *buffer) { auto buffer_input_stream = GARROW_BUFFER_INPUT_STREAM(g_object_new(GARROW_TYPE_BUFFER_INPUT_STREAM, "input-stream", arrow_buffer_reader, + "buffer", buffer, NULL)); return buffer_input_stream; } diff --git a/c_glib/arrow-glib/input-stream.hpp b/c_glib/arrow-glib/input-stream.hpp index 17d2bd924..1d835e80b 100644 --- a/c_glib/arrow-glib/input-stream.hpp +++ b/c_glib/arrow-glib/input-stream.hpp @@ -31,6 +31,8 @@ std::shared_ptr<arrow::io::InputStream> garrow_input_stream_get_raw(GArrowInputS std::shared_ptr<arrow::io::RandomAccessFile> garrow_seekable_input_stream_get_raw(GArrowSeekableInputStream *input_stream); GArrowBufferInputStream *garrow_buffer_input_stream_new_raw(std::shared_ptr<arrow::io::BufferReader> *arrow_buffer_reader); +GArrowBufferInputStream *garrow_buffer_input_stream_new_raw_buffer(std::shared_ptr<arrow::io::BufferReader> *arrow_buffer_reader, + GArrowBuffer *buffer); std::shared_ptr<arrow::io::BufferReader> garrow_buffer_input_stream_get_raw(GArrowBufferInputStream *input_stream); GArrowMemoryMappedInputStream *garrow_memory_mapped_input_stream_new_raw(std::shared_ptr<arrow::io::MemoryMappedFile> *arrow_memory_mapped_file); diff --git a/c_glib/arrow-glib/tensor.cpp b/c_glib/arrow-glib/tensor.cpp index 359831f67..e7d3c38f7 100644 --- a/c_glib/arrow-glib/tensor.cpp +++ b/c_glib/arrow-glib/tensor.cpp @@ -198,7 +198,7 @@ garrow_tensor_new(GArrowDataType *data_type, arrow_shape, arrow_strides, arrow_dimension_names); - auto tensor = garrow_tensor_new_raw(&arrow_tensor, data); + auto tensor = garrow_tensor_new_raw_buffer(&arrow_tensor, data); return tensor; } @@ -436,8 +436,14 @@ garrow_tensor_is_column_major(GArrowTensor *tensor) G_END_DECLS GArrowTensor * -garrow_tensor_new_raw(std::shared_ptr<arrow::Tensor> *arrow_tensor, - GArrowBuffer *buffer) +garrow_tensor_new_raw(std::shared_ptr<arrow::Tensor> *arrow_tensor) +{ + return garrow_tensor_new_raw_buffer(arrow_tensor, nullptr); +} + +GArrowTensor * +garrow_tensor_new_raw_buffer(std::shared_ptr<arrow::Tensor> *arrow_tensor, + GArrowBuffer *buffer) { auto tensor = GARROW_TENSOR(g_object_new(GARROW_TYPE_TENSOR, "tensor", arrow_tensor, diff --git a/c_glib/arrow-glib/tensor.hpp b/c_glib/arrow-glib/tensor.hpp index 8e54e492a..c90dc6d4d 100644 --- a/c_glib/arrow-glib/tensor.hpp +++ b/c_glib/arrow-glib/tensor.hpp @@ -23,6 +23,7 @@ #include <arrow-glib/tensor.h> -GArrowTensor *garrow_tensor_new_raw(std::shared_ptr<arrow::Tensor> *arrow_tensor, - GArrowBuffer *buffer); +GArrowTensor *garrow_tensor_new_raw(std::shared_ptr<arrow::Tensor> *arrow_tensor); +GArrowTensor *garrow_tensor_new_raw_buffer(std::shared_ptr<arrow::Tensor> *arrow_tensor, + GArrowBuffer *buffer); std::shared_ptr<arrow::Tensor> garrow_tensor_get_raw(GArrowTensor *tensor); diff --git a/c_glib/test/test-buffer.rb b/c_glib/test/test-buffer.rb index 3b02df7ba..3b9a031ab 100644 --- a/c_glib/test/test-buffer.rb +++ b/c_glib/test/test-buffer.rb @@ -23,6 +23,16 @@ def setup @buffer = Arrow::Buffer.new(@data) end + def test_new_bytes + bytes_data = GLib::Bytes.new(@data) + buffer = Arrow::Buffer.new(bytes_data) + if GLib.check_binding_version?(3, 2, 2) + assert_equal(bytes_data.pointer, buffer.data.pointer) + else + assert_equal(@data, buffer.data.to_s) + end + end + def test_equal assert_equal(@buffer, Arrow::Buffer.new(@data.dup)) diff --git a/c_glib/test/test-mutable-buffer.rb b/c_glib/test/test-mutable-buffer.rb index df62dcf1e..f370e6026 100644 --- a/c_glib/test/test-mutable-buffer.rb +++ b/c_glib/test/test-mutable-buffer.rb @@ -21,6 +21,16 @@ def setup @buffer = Arrow::MutableBuffer.new(@data) end + def test_new_bytes + bytes_data = GLib::Bytes.new(@data) + buffer = Arrow::MutableBuffer.new(bytes_data) + if GLib.check_binding_version?(3, 2, 2) + assert_equal(bytes_data.pointer, buffer.mutable_data.pointer) + else + assert_equal(@data, buffer.mutable_data.to_s) + end + end + def test_mutable? assert do @buffer.mutable? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > [GLib] Can't share the same memory in GArrowBuffer safely > --------------------------------------------------------- > > Key: ARROW-2261 > URL: https://issues.apache.org/jira/browse/ARROW-2261 > Project: Apache Arrow > Issue Type: Improvement > Components: GLib > Affects Versions: 0.8.0 > Reporter: Kouhei Sutou > Assignee: Kouhei Sutou > Priority: Major > Labels: pull-request-available > Fix For: 0.9.0 > > -- This message was sent by Atlassian JIRA (v7.6.3#76005)