Repository: arrow Updated Branches: refs/heads/master 00a7d55cc -> 83a4405ea
ARROW-599: [C++] Lz4 compression codec support Author: Max Risukhin <risuhin....@gmail.com> Closes #813 from MaxRis/ARROW-599 and squashes the following commits: 434a238 [Max Risukhin] ARROW-599: [C++] Lz4 compression codec support Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/83a4405e Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/83a4405e Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/83a4405e Branch: refs/heads/master Commit: 83a4405ea0bd1696aeec7677edaef8671faed1ea Parents: 00a7d55 Author: Max Risukhin <risuhin....@gmail.com> Authored: Thu Jul 6 14:33:52 2017 +0200 Committer: Uwe L. Korn <uw...@xhochy.com> Committed: Thu Jul 6 14:33:52 2017 +0200 ---------------------------------------------------------------------- cpp/CMakeLists.txt | 3 ++- cpp/build-support/build-lz4-lib.sh | 16 +++++++++++++++ cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 +- cpp/src/arrow/util/compression-test.cc | 4 ++++ cpp/src/arrow/util/compression.cc | 26 ++++++++++++++++++++++++ cpp/src/arrow/util/compression.h | 16 ++++++++++++++- python/manylinux1/scripts/build_lz4.sh | 1 + 7 files changed, 65 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/83a4405e/cpp/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 76d89ce..6d01fd9 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -533,7 +533,8 @@ set(ARROW_STATIC_LINK_LIBS brotli_common snappy zlib - zstd_static) + zstd_static + lz4_static) add_dependencies(arrow_dependencies ${ARROW_STATIC_LINK_LIBS}) http://git-wip-us.apache.org/repos/asf/arrow/blob/83a4405e/cpp/build-support/build-lz4-lib.sh ---------------------------------------------------------------------- diff --git a/cpp/build-support/build-lz4-lib.sh b/cpp/build-support/build-lz4-lib.sh new file mode 100755 index 0000000..62805ba --- /dev/null +++ b/cpp/build-support/build-lz4-lib.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +export CFLAGS="${CFLAGS} -O3 -fPIC" +make -j4 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow/blob/83a4405e/cpp/cmake_modules/ThirdpartyToolchain.cmake ---------------------------------------------------------------------- diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 2be7b5a..3eef2f7 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -623,7 +623,7 @@ if (NOT LZ4_FOUND) set(LZ4_BUILD_COMMAND BUILD_COMMAND msbuild.exe /m /p:Configuration=${CMAKE_BUILD_TYPE} /p:Platform=x64 /p:PlatformToolset=v140 /t:Build ${LZ4_BUILD_DIR}/visual/VS2010/lz4.sln) else() set(LZ4_STATIC_LIB "${LZ4_BUILD_DIR}/lib/liblz4.a") - set(LZ4_BUILD_COMMAND BUILD_COMMAND make -j4) + set(LZ4_BUILD_COMMAND BUILD_COMMAND ${CMAKE_SOURCE_DIR}/build-support/build-lz4-lib.sh) endif() ExternalProject_Add(lz4_ep http://git-wip-us.apache.org/repos/asf/arrow/blob/83a4405e/cpp/src/arrow/util/compression-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/compression-test.cc b/cpp/src/arrow/util/compression-test.cc index 3b19a6d..56dcc2a 100644 --- a/cpp/src/arrow/util/compression-test.cc +++ b/cpp/src/arrow/util/compression-test.cc @@ -90,4 +90,8 @@ TEST(TestCompressors, ZSTD) { CheckCodec<ZSTDCodec>(); } +TEST(TestCompressors, Lz4) { + CheckCodec<Lz4Codec>(); +} + } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/83a4405e/cpp/src/arrow/util/compression.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/compression.cc b/cpp/src/arrow/util/compression.cc index df1afa3..0f17e7c 100644 --- a/cpp/src/arrow/util/compression.cc +++ b/cpp/src/arrow/util/compression.cc @@ -29,6 +29,7 @@ #include <brotli/decode.h> #include <brotli/encode.h> +#include <lz4.h> #include <snappy.h> #include <zlib.h> #include <zstd.h> @@ -357,4 +358,29 @@ Status ZSTDCodec::Compress(int64_t input_len, const uint8_t* input, return Status::OK(); } +// ---------------------------------------------------------------------- +// Lz4 implementation + +Status Lz4Codec::Decompress( + int64_t input_len, const uint8_t* input, int64_t output_len, uint8_t* output_buffer) { + int64_t decompressed_size = LZ4_decompress_safe(reinterpret_cast<const char*>(input), + reinterpret_cast<char*>(output_buffer), static_cast<int>(input_len), + static_cast<int>(output_len)); + if (decompressed_size < 1) { return Status::IOError("Corrupt Lz4 compressed data."); } + return Status::OK(); +} + +int64_t Lz4Codec::MaxCompressedLen(int64_t input_len, const uint8_t* input) { + return LZ4_compressBound(static_cast<int>(input_len)); +} + +Status Lz4Codec::Compress(int64_t input_len, const uint8_t* input, + int64_t output_buffer_len, uint8_t* output_buffer, int64_t* output_length) { + *output_length = LZ4_compress_default(reinterpret_cast<const char*>(input), + reinterpret_cast<char*>(output_buffer), static_cast<int>(input_len), + static_cast<int>(output_buffer_len)); + if (*output_length < 1) { return Status::IOError("Lz4 compression failure."); } + return Status::OK(); +} + } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/83a4405e/cpp/src/arrow/util/compression.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/compression.h b/cpp/src/arrow/util/compression.h index 9e581d8..d382153 100644 --- a/cpp/src/arrow/util/compression.h +++ b/cpp/src/arrow/util/compression.h @@ -27,7 +27,7 @@ namespace arrow { struct Compression { - enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, ZSTD }; + enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, ZSTD, LZ4 }; }; class ARROW_EXPORT Codec { @@ -118,6 +118,20 @@ class ARROW_EXPORT ZSTDCodec : public Codec { const char* name() const override { return "zstd"; } }; +// Lz4 codec. +class ARROW_EXPORT Lz4Codec : public Codec { + public: + Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len, + uint8_t* output_buffer) override; + + Status Compress(int64_t input_len, const uint8_t* input, int64_t output_buffer_len, + uint8_t* output_buffer, int64_t* output_length) override; + + int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) override; + + const char* name() const override { return "lz4"; } +}; + } // namespace arrow #endif http://git-wip-us.apache.org/repos/asf/arrow/blob/83a4405e/python/manylinux1/scripts/build_lz4.sh ---------------------------------------------------------------------- diff --git a/python/manylinux1/scripts/build_lz4.sh b/python/manylinux1/scripts/build_lz4.sh index 5a25d3d..975a301 100755 --- a/python/manylinux1/scripts/build_lz4.sh +++ b/python/manylinux1/scripts/build_lz4.sh @@ -13,6 +13,7 @@ export LZ4_VERSION="1.7.5" export PREFIX="/usr" +export CFLAGS="${CFLAGS} -O3 -fPIC" export LDFLAGS="${LDFLAGS} -Wl,-rpath,${PREFIX}/lib -L${PREFIX}/lib" wget "https://github.com/lz4/lz4/archive/v${LZ4_VERSION}.tar.gz" -O lz4-${LZ4_VERSION}.tar.gz tar xf lz4-${LZ4_VERSION}.tar.gz