This is an automated email from the ASF dual-hosted git repository.
baumgold pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git
The following commit(s) were added to refs/heads/main by this push:
new f8f8d8e Pre-allocate buffer (#422)
f8f8d8e is described below
commit f8f8d8e2a44822a8c31ed354cd7d10e9d76ffcc3
Author: Joao Aparicio <[email protected]>
AuthorDate: Tue Apr 11 10:52:05 2023 -0500
Pre-allocate buffer (#422)
If we let transcode to its own allocation it will allocate a small
vector, start filling it, resize the vector, fill it some more, resize
the vector, etc.
Instead in this commit we pre-allocate a vector of the corect size and
pass it to transcode().
Inspired by https://github.com/apache/arrow-julia/pull/399
---
Project.toml | 2 ++
src/table.jl | 5 +++--
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/Project.toml b/Project.toml
index 1bd3284..bb4edd0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -33,6 +33,7 @@ PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
SentinelArrays = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53"
+TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
WorkerUtilities = "76eceee3-57b5-4d4a-8e66-0e911cebbf60"
@@ -48,6 +49,7 @@ PooledArrays = "0.5, 1.0"
SentinelArrays = "1"
Tables = "1.1"
TimeZones = "1"
+TranscodingStreams = "0.9.12"
WorkerUtilities = "1.1"
julia = "1.6"
diff --git a/src/table.jl b/src/table.jl
index b1695e9..db5184f 100644
--- a/src/table.jl
+++ b/src/table.jl
@@ -521,10 +521,11 @@ function uncompress(ptr::Ptr{UInt8}, buffer, compression)
len = unsafe_load(convert(Ptr{Int64}, ptr))
ptr += 8 # skip past uncompressed length as Int64
encodedbytes = unsafe_wrap(Array, ptr, buffer.length - 8)
+ decodedbytes = Vector{UInt8}(undef, len)
if compression.codec === Meta.CompressionTypes.LZ4_FRAME
- decodedbytes = transcode(LZ4FrameDecompressor, encodedbytes)
+ transcode(LZ4FrameDecompressor, encodedbytes, decodedbytes)
elseif compression.codec === Meta.CompressionTypes.ZSTD
- decodedbytes = transcode(ZstdDecompressor, encodedbytes)
+ transcode(ZstdDecompressor, encodedbytes, decodedbytes)
else
error("unsupported compression type when reading arrow buffers:
$(typeof(compression.codec))")
end