emkornfield commented on code in PR #48345: URL: https://github.com/apache/arrow/pull/48345#discussion_r3472267014
########## cpp/src/arrow/util/alp/alp_codec.h: ########## @@ -0,0 +1,191 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// High-level codec interface for ALP compression + +#pragma once + +#include <cstddef> +#include <cstdint> +#include "arrow/result.h" +#include "arrow/status.h" +#include "arrow/util/alp/alp.h" +#include "arrow/util/alp/alp_sampler.h" + +namespace arrow { +namespace util { +namespace alp { + +// ---------------------------------------------------------------------- +// AlpCodec + +/// \class AlpCodec +/// \brief High-level interface for ALP compression +/// +/// AlpCodec is an interface for Adaptive Lossless floating-Point Compression +/// (ALP) (https://dl.acm.org/doi/10.1145/3626717). For encoding, it samples +/// the data and applies decimal compression (Alp) to floating point values. +/// This class acts as a wrapper around the vector-based interfaces of +/// AlpSampler and Alp. +/// +/// \tparam T the floating point type (float or double) +template <typename T> +class AlpCodec { + public: + /// Type alias for the sampler result containing encoding presets + using AlpSamplerResult = typename AlpSampler<T>::AlpSamplerResult; + + /// \brief Create a sampling preset from input data + /// + /// This samples the input data and generates an encoding preset that can be + /// reused for encoding. This is useful when you want to pre-compute the preset + /// outside of the benchmark loop or encode multiple batches with the same preset. + /// + /// \param[in] input pointer to the input data to sample + /// \param[in] num_elements number of elements to sample + /// \return the sampling result containing the encoding preset + static AlpSamplerResult CreateSamplingPreset(const T* input, int64_t num_elements); + + /// \brief Encode floating point values using a pre-computed preset + /// + /// This encodes the data using a preset that was previously computed via + /// CreateSamplingPreset(). This avoids the sampling overhead during encoding. + /// + /// \param[in] input pointer to the input that is to be encoded + /// \param[in] num_elements number of elements to encode + /// \param[in] preset the pre-computed sampling result from CreateSamplingPreset() + /// \param[in] vector_size number of elements per vector (must be a power of 2, + /// at most 2^kMaxLogVectorSize) + /// \param[out] output pointer to the memory region we will encode into. + /// Must be at least GetMaxCompressedSize(num_elements) bytes. + /// \param[in,out] output_size the actual size of the encoded data in bytes, Review Comment: why note return an error, I'm not sure what bail out means here? we expect the caller to then check for 0? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
