This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch array_tuple_sketch in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
commit c39539cd95214b6362f32e177d92e412b481ac1e Author: AlexanderSaydakov <[email protected]> AuthorDate: Mon Oct 23 14:11:46 2023 -0700 more generic implementation of array_of_doubles_sketch --- tuple/include/array_of_doubles_sketch.hpp | 178 ++------------- tuple/include/array_of_doubles_union.hpp | 81 ------- tuple/include/array_of_doubles_union_impl.hpp | 43 ---- ...doubles_a_not_b.hpp => array_tuple_a_not_b.hpp} | 23 +- ...not_b_impl.hpp => array_tuple_a_not_b_impl.hpp} | 8 +- ...tersection.hpp => array_tuple_intersection.hpp} | 21 +- ..._impl.hpp => array_tuple_intersection_impl.hpp} | 10 +- tuple/include/array_tuple_sketch.hpp | 238 +++++++++++++++++++++ ...sketch_impl.hpp => array_tuple_sketch_impl.hpp} | 81 ++++--- tuple/include/array_tuple_union.hpp | 81 +++++++ tuple/include/array_tuple_union_impl.hpp | 43 ++++ .../test/aod_sketch_deserialize_from_java_test.cpp | 3 +- tuple/test/aod_sketch_serialize_for_java.cpp | 3 +- tuple/test/array_of_doubles_sketch_test.cpp | 8 +- 14 files changed, 456 insertions(+), 365 deletions(-) diff --git a/tuple/include/array_of_doubles_sketch.hpp b/tuple/include/array_of_doubles_sketch.hpp index 926b096..9371ebc 100644 --- a/tuple/include/array_of_doubles_sketch.hpp +++ b/tuple/include/array_of_doubles_sketch.hpp @@ -20,175 +20,35 @@ #ifndef ARRAY_OF_DOUBLES_SKETCH_HPP_ #define ARRAY_OF_DOUBLES_SKETCH_HPP_ -#include <vector> -#include <memory> - -#include "serde.hpp" -#include "tuple_sketch.hpp" +#include "array_tuple_sketch.hpp" +#include "array_tuple_union.hpp" +#include "array_tuple_intersection.hpp" +#include "array_tuple_a_not_b.hpp" namespace datasketches { -// This sketch is equivalent of ArrayOfDoublesSketch in Java - -// This simple array of double is faster than std::vector and should be sufficient for this application -template<typename Allocator = std::allocator<double>> -class aod { -public: - explicit aod(uint8_t size, const Allocator& allocator = Allocator()): - allocator_(allocator), size_(size), array_(allocator_.allocate(size_)) { - std::fill(array_, array_ + size_, 0); - } - aod(const aod& other): - allocator_(other.allocator_), - size_(other.size_), - array_(allocator_.allocate(size_)) - { - std::copy(other.array_, other.array_ + size_, array_); - } - aod(aod&& other) noexcept: - allocator_(std::move(other.allocator_)), - size_(other.size_), - array_(other.array_) - { - other.array_ = nullptr; - } - ~aod() { - if (array_ != nullptr) allocator_.deallocate(array_, size_); - } - aod& operator=(const aod& other) { - aod copy(other); - std::swap(allocator_, copy.allocator_); - std::swap(size_, copy.size_); - std::swap(array_, copy.array_); - return *this; - } - aod& operator=(aod&& other) { - std::swap(allocator_, other.allocator_); - std::swap(size_, other.size_); - std::swap(array_, other.array_); - return *this; - } - double& operator[](size_t index) { return array_[index]; } - double operator[](size_t index) const { return array_[index]; } - uint8_t size() const { return size_; } - double* data() { return array_; } - const double* data() const { return array_; } - bool operator==(const aod& other) const { - for (uint8_t i = 0; i < size_; ++i) if (array_[i] != other.array_[i]) return false; - return true; - } -private: - Allocator allocator_; - uint8_t size_; - double* array_; -}; - -template<typename A = std::allocator<double>> -class array_of_doubles_update_policy { -public: - array_of_doubles_update_policy(uint8_t num_values = 1, const A& allocator = A()): - allocator_(allocator), num_values_(num_values) {} - aod<A> create() const { - return aod<A>(num_values_, allocator_); - } - template<typename InputVector> // to allow any type with indexed access (such as double*) - void update(aod<A>& summary, const InputVector& update) const { - for (uint8_t i = 0; i < num_values_; ++i) summary[i] += update[i]; - } - uint8_t get_num_values() const { - return num_values_; - } - -private: - A allocator_; - uint8_t num_values_; -}; - -// forward declaration -template<typename A> class compact_array_of_doubles_sketch_alloc; - -template<typename A> using AllocAOD = typename std::allocator_traits<A>::template rebind_alloc<aod<A>>; - -/** - * Update Array of doubles sketch. - * There is no constructor. Use builder instead. - */ -template<typename A = std::allocator<double>> -class update_array_of_doubles_sketch_alloc: public update_tuple_sketch<aod<A>, aod<A>, array_of_doubles_update_policy<A>, AllocAOD<A>> { -public: - using Base = update_tuple_sketch<aod<A>, aod<A>, array_of_doubles_update_policy<A>, AllocAOD<A>>; - using resize_factor = typename Base::resize_factor; - - class builder; +/// convenience alias with default allocator, default policy for update_array_of_doubles_sketch +using default_array_of_doubles_update_policy = default_array_tuple_update_policy<array<double>>; - compact_array_of_doubles_sketch_alloc<A> compact(bool ordered = true) const; +/// convenience alias with default allocator, equivalent to ArrayOfDoublesUpdatableSketch in Java +using update_array_of_doubles_sketch = update_array_tuple_sketch<array<double>>; - /// @return number of double values in array - uint8_t get_num_values() const; +/// convenience alias with default allocator, equivalent to ArrayOfDoublesCompactSketch in Java +using compact_array_of_doubles_sketch = compact_array_tuple_sketch<array<double>>; -private: - // for builder - update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, - uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator); -}; +/// convenience alias, default policy for array_of_doubles_union +using default_array_of_doubles_union_policy = default_array_tuple_union_policy<array<double>>; -/// alias with the default allocator for convenience -using update_array_of_doubles_sketch = update_array_of_doubles_sketch_alloc<>; +/// convenience alias with default allocator, equivalent to ArrayOfDoublesUnion in Java +using array_of_doubles_union = array_tuple_union<array<double>>; -/// Update Array of doubles sketch builder -template<typename A> -class update_array_of_doubles_sketch_alloc<A>::builder: public tuple_base_builder<builder, array_of_doubles_update_policy<A>, A> { -public: - builder(const array_of_doubles_update_policy<A>& policy = array_of_doubles_update_policy<A>(), const A& allocator = A()); - update_array_of_doubles_sketch_alloc<A> build() const; -}; +/// convenience alias with default allocator, equivalent to ArrayOfDoublesIntersection in Java +/// no default policy since it is not clear in general +template<typename Policy> using array_of_doubles_intersection = array_tuple_intersection<array<double>, Policy>; -/// Compact Array of doubles sketch -template<typename A = std::allocator<double>> -class compact_array_of_doubles_sketch_alloc: public compact_tuple_sketch<aod<A>, AllocAOD<A>> { -public: - using Base = compact_tuple_sketch<aod<A>, AllocAOD<A>>; - using Entry = typename Base::Entry; - using AllocEntry = typename Base::AllocEntry; - using AllocU64 = typename Base::AllocU64; - using vector_bytes = typename Base::vector_bytes; - - static const uint8_t SERIAL_VERSION = 1; - static const uint8_t SKETCH_FAMILY = 9; - static const uint8_t SKETCH_TYPE = 3; - enum flags { UNUSED1, UNUSED2, IS_EMPTY, HAS_ENTRIES, IS_ORDERED }; - - /** - * Copy constructor. - * Constructs a compact sketch from another AOD sketch (update or compact) - * @param other sketch to be constructed from - * @param ordered if true make the resulting sketch ordered - */ - template<typename Sketch> - compact_array_of_doubles_sketch_alloc(const Sketch& other, bool ordered = true); - - /// @return number of double values in array - uint8_t get_num_values() const; - - void serialize(std::ostream& os) const; - vector_bytes serialize(unsigned header_size_bytes = 0) const; - - static compact_array_of_doubles_sketch_alloc deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED, const A& allocator = A()); - static compact_array_of_doubles_sketch_alloc deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED, - const A& allocator = A()); - - // for internal use - compact_array_of_doubles_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries, uint8_t num_values); - compact_array_of_doubles_sketch_alloc(uint8_t num_values, Base&& base); -private: - uint8_t num_values_; -}; - -/// alias with the default allocator for convenience -using compact_array_of_doubles_sketch = compact_array_of_doubles_sketch_alloc<>; +/// convenience alias with default allocator, equivalent to ArrayOfDoublesAnotB in Java +using array_of_doubles_a_not_b = array_tuple_a_not_b<array<double>>; } /* namespace datasketches */ -#include "array_of_doubles_sketch_impl.hpp" - #endif diff --git a/tuple/include/array_of_doubles_union.hpp b/tuple/include/array_of_doubles_union.hpp deleted file mode 100644 index 06ca0b6..0000000 --- a/tuple/include/array_of_doubles_union.hpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef ARRAY_OF_DOUBLES_UNION_HPP_ -#define ARRAY_OF_DOUBLES_UNION_HPP_ - -#include <vector> -#include <memory> - -#include "array_of_doubles_sketch.hpp" -#include "tuple_union.hpp" - -namespace datasketches { - -template<typename A = std::allocator<double>> -struct array_of_doubles_union_policy_alloc { - array_of_doubles_union_policy_alloc(uint8_t num_values = 1): num_values_(num_values) {} - - void operator()(aod<A>& summary, const aod<A>& other) const { - for (size_t i = 0; i < summary.size(); ++i) { - summary[i] += other[i]; - } - } - - uint8_t get_num_values() const { - return num_values_; - } -private: - uint8_t num_values_; -}; - -using array_of_doubles_union_policy = array_of_doubles_union_policy_alloc<>; - -template<typename Allocator = std::allocator<double>> -class array_of_doubles_union_alloc: public tuple_union<aod<Allocator>, array_of_doubles_union_policy_alloc<Allocator>, AllocAOD<Allocator>> { -public: - using Policy = array_of_doubles_union_policy_alloc<Allocator>; - using Base = tuple_union<aod<Allocator>, Policy, AllocAOD<Allocator>>; - using CompactSketch = compact_array_of_doubles_sketch_alloc<Allocator>; - using resize_factor = theta_constants::resize_factor; - - class builder; - - CompactSketch get_result(bool ordered = true) const; - -private: - // for builder - array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator); -}; - -template<typename Allocator> -class array_of_doubles_union_alloc<Allocator>::builder: public tuple_base_builder<builder, array_of_doubles_union_policy_alloc<Allocator>, Allocator> { -public: - builder(const array_of_doubles_union_policy_alloc<Allocator>& policy = array_of_doubles_union_policy_alloc<Allocator>(), const Allocator& allocator = Allocator()); - array_of_doubles_union_alloc<Allocator> build() const; -}; - -// alias with default allocator -using array_of_doubles_union = array_of_doubles_union_alloc<>; - -} /* namespace datasketches */ - -#include "array_of_doubles_union_impl.hpp" - -#endif diff --git a/tuple/include/array_of_doubles_union_impl.hpp b/tuple/include/array_of_doubles_union_impl.hpp deleted file mode 100644 index c3fabf0..0000000 --- a/tuple/include/array_of_doubles_union_impl.hpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -namespace datasketches { - -template<typename A> -array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator): -Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator) -{} - -template<typename A> -auto array_of_doubles_union_alloc<A>::get_result(bool ordered) const -> CompactSketch { - return compact_array_of_doubles_sketch_alloc<A>(this->state_.get_policy().get_policy().get_num_values(), Base::get_result(ordered)); -} - -// builder - -template<typename A> -array_of_doubles_union_alloc<A>::builder::builder(const Policy& policy, const A& allocator): -tuple_base_builder<builder, Policy, A>(policy, allocator) {} - -template<typename A> -array_of_doubles_union_alloc<A> array_of_doubles_union_alloc<A>::builder::build() const { - return array_of_doubles_union_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_); -} - -} /* namespace datasketches */ diff --git a/tuple/include/array_of_doubles_a_not_b.hpp b/tuple/include/array_tuple_a_not_b.hpp similarity index 59% rename from tuple/include/array_of_doubles_a_not_b.hpp rename to tuple/include/array_tuple_a_not_b.hpp index 67e14ca..70c5df3 100644 --- a/tuple/include/array_of_doubles_a_not_b.hpp +++ b/tuple/include/array_tuple_a_not_b.hpp @@ -17,36 +17,31 @@ * under the License. */ -#ifndef ARRAY_OF_DOUBLES_A_NOT_B_HPP_ -#define ARRAY_OF_DOUBLES_A_NOT_B_HPP_ +#ifndef ARRAY_TUPLE_A_NOT_B_HPP_ +#define ARRAY_TUPLE_A_NOT_B_HPP_ #include <vector> #include <memory> -#include "array_of_doubles_sketch.hpp" +#include "array_tuple_sketch.hpp" #include "tuple_a_not_b.hpp" namespace datasketches { -template<typename Allocator = std::allocator<double>> -class array_of_doubles_a_not_b_alloc: tuple_a_not_b<aod<Allocator>, AllocAOD<Allocator>> { +template<typename Array, typename Allocator = typename Array::allocator_type> +class array_tuple_a_not_b: tuple_a_not_b<Array, Allocator> { public: - using Summary = aod<Allocator>; - using AllocSummary = AllocAOD<Allocator>; - using Base = tuple_a_not_b<Summary, AllocSummary>; - using CompactSketch = compact_array_of_doubles_sketch_alloc<Allocator>; + using Base = tuple_a_not_b<Array, Allocator>; + using CompactSketch = compact_array_tuple_sketch<Array, Allocator>; - explicit array_of_doubles_a_not_b_alloc(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator()); + explicit array_tuple_a_not_b(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator()); template<typename FwdSketch, typename Sketch> CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const; }; -// alias with the default allocator for convenience -using array_of_doubles_a_not_b = array_of_doubles_a_not_b_alloc<>; - } /* namespace datasketches */ -#include "array_of_doubles_a_not_b_impl.hpp" +#include "array_tuple_a_not_b_impl.hpp" #endif diff --git a/tuple/include/array_of_doubles_a_not_b_impl.hpp b/tuple/include/array_tuple_a_not_b_impl.hpp similarity index 76% rename from tuple/include/array_of_doubles_a_not_b_impl.hpp rename to tuple/include/array_tuple_a_not_b_impl.hpp index d0330e2..9bd62d5 100644 --- a/tuple/include/array_of_doubles_a_not_b_impl.hpp +++ b/tuple/include/array_tuple_a_not_b_impl.hpp @@ -19,13 +19,13 @@ namespace datasketches { -template<typename A> -array_of_doubles_a_not_b_alloc<A>::array_of_doubles_a_not_b_alloc(uint64_t seed, const A& allocator): +template<typename Array, typename Allocator> +array_tuple_a_not_b<Array, Allocator>::array_tuple_a_not_b(uint64_t seed, const Allocator& allocator): Base(seed, allocator) {} -template<typename A> +template<typename Array, typename Allocator> template<typename FwdSketch, typename Sketch> -auto array_of_doubles_a_not_b_alloc<A>::compute(FwdSketch&& a, const Sketch& b, bool ordered) const -> CompactSketch { +auto array_tuple_a_not_b<Array, Allocator>::compute(FwdSketch&& a, const Sketch& b, bool ordered) const -> CompactSketch { return CompactSketch(a.get_num_values(), Base::compute(std::forward<FwdSketch>(a), b, ordered)); } diff --git a/tuple/include/array_of_doubles_intersection.hpp b/tuple/include/array_tuple_intersection.hpp similarity index 61% rename from tuple/include/array_of_doubles_intersection.hpp rename to tuple/include/array_tuple_intersection.hpp index 89459c3..af8768f 100644 --- a/tuple/include/array_of_doubles_intersection.hpp +++ b/tuple/include/array_tuple_intersection.hpp @@ -17,36 +17,35 @@ * under the License. */ -#ifndef ARRAY_OF_DOUBLES_INTERSECTION_HPP_ -#define ARRAY_OF_DOUBLES_INTERSECTION_HPP_ +#ifndef ARRAY_TUPLE_INTERSECTION_HPP_ +#define ARRAY_TUPLE_INTERSECTION_HPP_ #include <vector> #include <memory> -#include "array_of_doubles_sketch.hpp" +#include "array_tuple_sketch.hpp" #include "tuple_intersection.hpp" namespace datasketches { template< + typename Array, typename Policy, - typename Allocator = std::allocator<double> + typename Allocator = typename Array::allocator_type > -class array_of_doubles_intersection: public tuple_intersection<aod<Allocator>, Policy, AllocAOD<Allocator>> { +class array_tuple_intersection: public tuple_intersection<Array, Policy, Allocator> { public: - using Summary = aod<Allocator>; - using AllocSummary = AllocAOD<Allocator>; - using Base = tuple_intersection<Summary, Policy, AllocSummary>; - using CompactSketch = compact_array_of_doubles_sketch_alloc<Allocator>; + using Base = tuple_intersection<Array, Policy, Allocator>; + using CompactSketch = compact_array_tuple_sketch<Array, Allocator>; using resize_factor = theta_constants::resize_factor; - explicit array_of_doubles_intersection(uint64_t seed = DEFAULT_SEED, const Policy& policy = Policy(), const Allocator& allocator = Allocator()); + explicit array_tuple_intersection(uint64_t seed = DEFAULT_SEED, const Policy& policy = Policy(), const Allocator& allocator = Allocator()); CompactSketch get_result(bool ordered = true) const; }; } /* namespace datasketches */ -#include "array_of_doubles_intersection_impl.hpp" +#include "array_tuple_intersection_impl.hpp" #endif diff --git a/tuple/include/array_of_doubles_intersection_impl.hpp b/tuple/include/array_tuple_intersection_impl.hpp similarity index 65% rename from tuple/include/array_of_doubles_intersection_impl.hpp rename to tuple/include/array_tuple_intersection_impl.hpp index 7cd2472..34c453a 100644 --- a/tuple/include/array_of_doubles_intersection_impl.hpp +++ b/tuple/include/array_tuple_intersection_impl.hpp @@ -19,13 +19,13 @@ namespace datasketches { -template<typename P, typename A> -array_of_doubles_intersection<P, A>::array_of_doubles_intersection(uint64_t seed, const P& policy, const A& allocator): +template<typename Array, typename Policy, typename Allocator> +array_tuple_intersection<Array, Policy, Allocator>::array_tuple_intersection(uint64_t seed, const Policy& policy, const Allocator& allocator): Base(seed, policy, allocator) {} -template<typename P, typename A> -auto array_of_doubles_intersection<P, A>::get_result(bool ordered) const -> CompactSketch { - return compact_array_of_doubles_sketch_alloc<A>(this->state_.get_policy().get_policy().get_num_values(), Base::get_result(ordered)); +template<typename Array, typename Policy, typename Allocator> +auto array_tuple_intersection<Array, Policy, Allocator>::get_result(bool ordered) const -> CompactSketch { + return CompactSketch(this->state_.get_policy().get_policy().get_num_values(), Base::get_result(ordered)); } } /* namespace datasketches */ diff --git a/tuple/include/array_tuple_sketch.hpp b/tuple/include/array_tuple_sketch.hpp new file mode 100644 index 0000000..39aebc6 --- /dev/null +++ b/tuple/include/array_tuple_sketch.hpp @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef ARRAY_TUPLE_SKETCH_HPP_ +#define ARRAY_TUPLE_SKETCH_HPP_ + +#include <vector> +#include <memory> + +#include "serde.hpp" +#include "tuple_sketch.hpp" + +namespace datasketches { + +// This is a wrapper around tuple sketch to match the functionality and serialization format of ArrayOfDoublesSketch in Java. +// For this the sketch must be configured with array<double> or std::vector<double>. +// This is a more generic implementation for any integral type (serialization assumes contiguous array size_of(T) * num_values). +// A set of type definitions for the ArrayOfDoubles equivalent is provided in a separate file array_of_doubles_sketch.hpp. + +// This simple array is faster than std::vector and should be sufficient for this application +template<typename T, typename Allocator = std::allocator<T>> +class array { +public: + using value_type = T; + using allocator_type = Allocator; + + explicit array(uint8_t size, T value, const Allocator& allocator = Allocator()): + allocator_(allocator), size_(size), array_(allocator_.allocate(size_)) { + std::fill(array_, array_ + size_, value); + } + array(const array& other): + allocator_(other.allocator_), + size_(other.size_), + array_(allocator_.allocate(size_)) + { + std::copy(other.array_, other.array_ + size_, array_); + } + array(array&& other) noexcept: + allocator_(std::move(other.allocator_)), + size_(other.size_), + array_(other.array_) + { + other.array_ = nullptr; + } + ~array() { + if (array_ != nullptr) allocator_.deallocate(array_, size_); + } + array& operator=(const array& other) { + array copy(other); + std::swap(allocator_, copy.allocator_); + std::swap(size_, copy.size_); + std::swap(array_, copy.array_); + return *this; + } + array& operator=(array&& other) { + std::swap(allocator_, other.allocator_); + std::swap(size_, other.size_); + std::swap(array_, other.array_); + return *this; + } + T& operator[](size_t index) { return array_[index]; } + T operator[](size_t index) const { return array_[index]; } + uint8_t size() const { return size_; } + T* data() { return array_; } + const T* data() const { return array_; } + bool operator==(const array& other) const { + for (uint8_t i = 0; i < size_; ++i) if (array_[i] != other.array_[i]) return false; + return true; + } +private: + Allocator allocator_; + uint8_t size_; + T* array_; +}; + +/// default array tuple update policy +template<typename Array, typename Allocator = typename Array::allocator_type> +class default_array_tuple_update_policy { +public: + default_array_tuple_update_policy(uint8_t num_values = 1, const Allocator& allocator = Allocator()): + allocator_(allocator), num_values_(num_values) {} + Array create() const { + return Array(num_values_, 0, allocator_); + } + template<typename InputArray> // to allow any type with indexed access (such as double* or std::vector) + void update(Array& array, const InputArray& update) const { + for (uint8_t i = 0; i < num_values_; ++i) array[i] += update[i]; + } + uint8_t get_num_values() const { + return num_values_; + } + +private: + Allocator allocator_; + uint8_t num_values_; +}; + +// forward declaration +template<typename Array, typename Allocator> class compact_array_tuple_sketch; + +/** + * Update array tuple sketch. + * There is no constructor. Use builder instead. + */ +template< + typename Array, + typename Policy = default_array_tuple_update_policy<Array>, + typename Allocator = typename Array::allocator_type +> +class update_array_tuple_sketch: public update_tuple_sketch<Array, Array, Policy, Allocator> { +public: + using Base = update_tuple_sketch<Array, Array, Policy, Allocator>; + using resize_factor = typename Base::resize_factor; + + class builder; + + compact_array_tuple_sketch<Array, Allocator> compact(bool ordered = true) const; + + /// @return number of values in array + uint8_t get_num_values() const; + +private: + // for builder + update_array_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, + uint64_t seed, const Policy& policy, const Allocator& allocator); +}; + +/// Update array tuple sketch builder +template<typename Array, typename Policy, typename Allocator> +class update_array_tuple_sketch<Array, Policy, Allocator>::builder: public tuple_base_builder<builder, Policy, Allocator> { +public: + /** + * Constructor + * @param policy + * @param allocator + */ + builder(const Policy& policy = Policy(), const Allocator& allocator = Allocator()); + + /// @return instance of sketch + update_array_tuple_sketch build() const; +}; + +/// Compact array tuple sketch +template< + typename Array, + typename Allocator = typename Array::allocator_type +> +class compact_array_tuple_sketch: public compact_tuple_sketch<Array, Allocator> { +public: + using Base = compact_tuple_sketch<Array, Allocator>; + using Entry = typename Base::Entry; + using AllocEntry = typename Base::AllocEntry; + using AllocU64 = typename Base::AllocU64; + using vector_bytes = typename Base::vector_bytes; + + static const uint8_t SERIAL_VERSION = 1; + static const uint8_t SKETCH_FAMILY = 9; + static const uint8_t SKETCH_TYPE = 3; + enum flags { UNUSED1, UNUSED2, IS_EMPTY, HAS_ENTRIES, IS_ORDERED }; + + /** + * Copy constructor. + * Constructs a compact sketch from another sketch (update or compact) + * @param other sketch to be constructed from + * @param ordered if true make the resulting sketch ordered + */ + template<typename Sketch> + compact_array_tuple_sketch(const Sketch& other, bool ordered = true); + + /// @return number of double values in array + uint8_t get_num_values() const; + + /** + * This method serializes the sketch into a given stream in a binary form + * @param os output stream + */ + void serialize(std::ostream& os) const; + + /** + * This method serializes the sketch as a vector of bytes. + * An optional header can be reserved in front of the sketch. + * It is a blank space of a given size. + * @param header_size_bytes space to reserve in front of the sketch + * @return serialized sketch as a vector of bytes + */ + vector_bytes serialize(unsigned header_size_bytes = 0) const; + + /** + * This method deserializes a sketch from a given stream. + * @param is input stream + * @param seed the seed for the hash function that was used to create the sketch + * @param allocator instance of an Allocator + * @return an instance of the sketch + */ + static compact_array_tuple_sketch deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator()); + + /** + * This method deserializes a sketch from a given array of bytes. + * @param bytes pointer to the array of bytes + * @param size the size of the array + * @param seed the seed for the hash function that was used to create the sketch + * @param allocator instance of an Allocator + * @return an instance of the sketch + */ + static compact_array_tuple_sketch deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED, + const Allocator& allocator = Allocator()); + +private: + uint8_t num_values_; + + template<typename Ar, typename P, typename Al> friend class array_tuple_union; + template<typename Ar, typename P, typename Al> friend class array_tuple_intersection; + template<typename Ar, typename Al> friend class array_tuple_a_not_b; + compact_array_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries, uint8_t num_values); + compact_array_tuple_sketch(uint8_t num_values, Base&& base); +}; + +} /* namespace datasketches */ + +#include "array_tuple_sketch_impl.hpp" + +#endif diff --git a/tuple/include/array_of_doubles_sketch_impl.hpp b/tuple/include/array_tuple_sketch_impl.hpp similarity index 66% rename from tuple/include/array_of_doubles_sketch_impl.hpp rename to tuple/include/array_tuple_sketch_impl.hpp index 1f0c5dd..42b3921 100644 --- a/tuple/include/array_of_doubles_sketch_impl.hpp +++ b/tuple/include/array_tuple_sketch_impl.hpp @@ -19,56 +19,55 @@ namespace datasketches { -template<typename A> -update_array_of_doubles_sketch_alloc<A>::update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, - float p, uint64_t theta, uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator): +template<typename Array, typename Policy, typename Allocator> +update_array_tuple_sketch<Array, Policy, Allocator>::update_array_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, + float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator): Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator) {} - -template<typename A> -uint8_t update_array_of_doubles_sketch_alloc<A>::get_num_values() const { +template<typename Array, typename Policy, typename Allocator> +uint8_t update_array_tuple_sketch<Array, Policy, Allocator>::get_num_values() const { return this->policy_.get_num_values(); } -template<typename A> -compact_array_of_doubles_sketch_alloc<A> update_array_of_doubles_sketch_alloc<A>::compact(bool ordered) const { - return compact_array_of_doubles_sketch_alloc<A>(*this, ordered); +template<typename Array, typename Policy, typename Allocator> +compact_array_tuple_sketch<Array, Allocator> update_array_tuple_sketch<Array, Policy, Allocator>::compact(bool ordered) const { + return compact_array_tuple_sketch<Array, Allocator>(*this, ordered); } // builder -template<typename A> -update_array_of_doubles_sketch_alloc<A>::builder::builder(const array_of_doubles_update_policy<A>& policy, const A& allocator): -tuple_base_builder<builder, array_of_doubles_update_policy<A>, A>(policy, allocator) {} +template<typename Array, typename Policy, typename Allocator> +update_array_tuple_sketch<Array, Policy, Allocator>::builder::builder(const Policy& policy, const Allocator& allocator): +tuple_base_builder<builder, Policy, Allocator>(policy, allocator) {} -template<typename A> -update_array_of_doubles_sketch_alloc<A> update_array_of_doubles_sketch_alloc<A>::builder::build() const { - return update_array_of_doubles_sketch_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_); +template<typename Array, typename Policy, typename Allocator> +auto update_array_tuple_sketch<Array, Policy, Allocator>::builder::build() const -> update_array_tuple_sketch { + return update_array_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_); } // compact sketch -template<typename A> +template<typename Array, typename Allocator> template<typename S> -compact_array_of_doubles_sketch_alloc<A>::compact_array_of_doubles_sketch_alloc(const S& other, bool ordered): +compact_array_tuple_sketch<Array, Allocator>::compact_array_tuple_sketch(const S& other, bool ordered): Base(other, ordered), num_values_(other.get_num_values()) {} -template<typename A> -compact_array_of_doubles_sketch_alloc<A>::compact_array_of_doubles_sketch_alloc(bool is_empty, bool is_ordered, +template<typename Array, typename Allocator> +compact_array_tuple_sketch<Array, Allocator>::compact_array_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries, uint8_t num_values): Base(is_empty, is_ordered, seed_hash, theta, std::move(entries)), num_values_(num_values) {} -template<typename A> -compact_array_of_doubles_sketch_alloc<A>::compact_array_of_doubles_sketch_alloc(uint8_t num_values, Base&& base): +template<typename Array, typename Allocator> +compact_array_tuple_sketch<Array, Allocator>::compact_array_tuple_sketch(uint8_t num_values, Base&& base): Base(std::move(base)), num_values_(num_values) {} -template<typename A> -uint8_t compact_array_of_doubles_sketch_alloc<A>::get_num_values() const { +template<typename Array, typename Allocator> +uint8_t compact_array_tuple_sketch<Array, Allocator>::get_num_values() const { return num_values_; } -template<typename A> -void compact_array_of_doubles_sketch_alloc<A>::serialize(std::ostream& os) const { +template<typename Array, typename Allocator> +void compact_array_tuple_sketch<Array, Allocator>::serialize(std::ostream& os) const { const uint8_t preamble_longs = 1; write(os, preamble_longs); const uint8_t serial_version = SERIAL_VERSION; @@ -96,17 +95,17 @@ void compact_array_of_doubles_sketch_alloc<A>::serialize(std::ostream& os) const write(os, it.first); } for (const auto& it: this->entries_) { - write(os, it.second.data(), it.second.size() * sizeof(double)); + write(os, it.second.data(), it.second.size() * sizeof(typename Array::value_type)); } } } -template<typename A> -auto compact_array_of_doubles_sketch_alloc<A>::serialize(unsigned header_size_bytes) const -> vector_bytes { +template<typename Array, typename Allocator> +auto compact_array_tuple_sketch<Array, Allocator>::serialize(unsigned header_size_bytes) const -> vector_bytes { const uint8_t preamble_longs = 1; const size_t size = header_size_bytes + 16 // preamble and theta + (this->entries_.size() > 0 ? 8 : 0) - + (sizeof(uint64_t) + sizeof(double) * num_values_) * this->entries_.size(); + + (sizeof(uint64_t) + sizeof(typename Array::value_type) * num_values_) * this->entries_.size(); vector_bytes bytes(size, 0, this->entries_.get_allocator()); uint8_t* ptr = bytes.data() + header_size_bytes; @@ -135,14 +134,14 @@ auto compact_array_of_doubles_sketch_alloc<A>::serialize(unsigned header_size_by ptr += copy_to_mem(it.first, ptr); } for (const auto& it: this->entries_) { - ptr += copy_to_mem(it.second.data(), ptr, it.second.size() * sizeof(double)); + ptr += copy_to_mem(it.second.data(), ptr, it.second.size() * sizeof(typename Array::value_type)); } } return bytes; } -template<typename A> -compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) { +template<typename Array, typename Allocator> +compact_array_tuple_sketch<Array, Allocator> compact_array_tuple_sketch<Array, Allocator>::deserialize(std::istream& is, uint64_t seed, const Allocator& allocator) { read<uint8_t>(is); // unused const auto serial_version = read<uint8_t>(is); const auto family = read<uint8_t>(is); @@ -165,19 +164,19 @@ compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator); read(is, keys.data(), num_entries * sizeof(uint64_t)); for (size_t i = 0; i < num_entries; ++i) { - aod<A> summary(num_values, allocator); - read(is, summary.data(), num_values * sizeof(double)); + Array summary(num_values, 0, allocator); + read(is, summary.data(), num_values * sizeof(typename Array::value_type)); entries.push_back(Entry(keys[i], std::move(summary))); } } if (!is.good()) throw std::runtime_error("error reading from std::istream"); const bool is_empty = flags_byte & (1 << flags::IS_EMPTY); const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED); - return compact_array_of_doubles_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values); + return compact_array_tuple_sketch<Array, Allocator>(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values); } -template<typename A> -compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) { +template<typename Array, typename Allocator> +compact_array_tuple_sketch<Array, Allocator> compact_array_tuple_sketch<Array, Allocator>::deserialize(const void* bytes, size_t size, uint64_t seed, const Allocator& allocator) { ensure_minimum_memory(size, 16); const char* ptr = static_cast<const char*>(bytes); ptr += sizeof(uint8_t); // unused @@ -207,19 +206,19 @@ compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A uint32_t num_entries; ptr += copy_from_mem(ptr, num_entries); ptr += sizeof(uint32_t); // unused - ensure_minimum_memory(size, 24 + (sizeof(uint64_t) + sizeof(double) * num_values) * num_entries); + ensure_minimum_memory(size, 24 + (sizeof(uint64_t) + sizeof(typename Array::value_type) * num_values) * num_entries); entries.reserve(num_entries); std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator); ptr += copy_from_mem(ptr, keys.data(), sizeof(uint64_t) * num_entries); for (size_t i = 0; i < num_entries; ++i) { - aod<A> summary(num_values, allocator); - ptr += copy_from_mem(ptr, summary.data(), num_values * sizeof(double)); + Array summary(num_values, 0, allocator); + ptr += copy_from_mem(ptr, summary.data(), num_values * sizeof(typename Array::value_type)); entries.push_back(Entry(keys[i], std::move(summary))); } } const bool is_empty = flags_byte & (1 << flags::IS_EMPTY); const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED); - return compact_array_of_doubles_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values); + return compact_array_tuple_sketch<Array, Allocator>(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values); } } /* namespace datasketches */ diff --git a/tuple/include/array_tuple_union.hpp b/tuple/include/array_tuple_union.hpp new file mode 100644 index 0000000..8f7f497 --- /dev/null +++ b/tuple/include/array_tuple_union.hpp @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef ARRAY_TUPLE_UNION_HPP_ +#define ARRAY_TUPLE_UNION_HPP_ + +#include <vector> +#include <memory> +#include "array_tuple_sketch.hpp" + +#include "tuple_union.hpp" + +namespace datasketches { + +/// default array tuple union policy +template<typename Array> +struct default_array_tuple_union_policy { + default_array_tuple_union_policy(uint8_t num_values = 1): num_values_(num_values) {} + + void operator()(Array& array, const Array& other) const { + for (uint8_t i = 0; i < num_values_; ++i) { + array[i] += other[i]; + } + } + uint8_t get_num_values() const { + return num_values_; + } +private: + uint8_t num_values_; +}; + +/// array tuple union +template< + typename Array, + typename Policy = default_array_tuple_union_policy<Array>, + typename Allocator = typename Array::allocator_type +> +class array_tuple_union: public tuple_union<Array, Policy, Allocator> { +public: + using value_type = typename Array::value_type; + using Base = tuple_union<Array, Policy, Allocator>; + using CompactSketch = compact_array_tuple_sketch<Array, Allocator>; + using resize_factor = theta_constants::resize_factor; + + class builder; + + CompactSketch get_result(bool ordered = true) const; + +private: + // for builder + array_tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator); +}; + +template<typename Array, typename Policy, typename Allocator> +class array_tuple_union<Array, Policy, Allocator>::builder: public tuple_base_builder<builder, Policy, Allocator> { +public: + builder(const Policy& policy = Policy(), const Allocator& allocator = Allocator()); + array_tuple_union build() const; +}; + +} /* namespace datasketches */ + +#include "array_tuple_union_impl.hpp" + +#endif diff --git a/tuple/include/array_tuple_union_impl.hpp b/tuple/include/array_tuple_union_impl.hpp new file mode 100644 index 0000000..ce11940 --- /dev/null +++ b/tuple/include/array_tuple_union_impl.hpp @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +namespace datasketches { + +template<typename Array, typename Policy, typename Allocator> +array_tuple_union<Array, Policy, Allocator>::array_tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator): +Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator) +{} + +template<typename Array, typename Policy, typename Allocator> +auto array_tuple_union<Array, Policy, Allocator>::get_result(bool ordered) const -> CompactSketch { + return CompactSketch(this->state_.get_policy().get_policy().get_num_values(), Base::get_result(ordered)); +} + +// builder + +template<typename Array, typename Policy, typename Allocator> +array_tuple_union<Array, Policy, Allocator>::builder::builder(const Policy& policy, const Allocator& allocator): +tuple_base_builder<builder, Policy, typename Array::allocator_type>(policy, allocator) {} + +template<typename Array, typename Policy, typename Allocator> +auto array_tuple_union<Array, Policy, Allocator>::builder::build() const -> array_tuple_union { + return array_tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_); +} + +} /* namespace datasketches */ diff --git a/tuple/test/aod_sketch_deserialize_from_java_test.cpp b/tuple/test/aod_sketch_deserialize_from_java_test.cpp index 4237e22..5c0c0ce 100644 --- a/tuple/test/aod_sketch_deserialize_from_java_test.cpp +++ b/tuple/test/aod_sketch_deserialize_from_java_test.cpp @@ -19,7 +19,8 @@ #include <catch2/catch.hpp> #include <fstream> -#include <array_of_doubles_sketch.hpp> + +#include "array_of_doubles_sketch.hpp" namespace datasketches { diff --git a/tuple/test/aod_sketch_serialize_for_java.cpp b/tuple/test/aod_sketch_serialize_for_java.cpp index 4ca978a..fa17031 100644 --- a/tuple/test/aod_sketch_serialize_for_java.cpp +++ b/tuple/test/aod_sketch_serialize_for_java.cpp @@ -19,7 +19,8 @@ #include <catch2/catch.hpp> #include <fstream> -#include <array_of_doubles_sketch.hpp> + +#include "array_of_doubles_sketch.hpp" namespace datasketches { diff --git a/tuple/test/array_of_doubles_sketch_test.cpp b/tuple/test/array_of_doubles_sketch_test.cpp index bb0aa3e..e01217b 100644 --- a/tuple/test/array_of_doubles_sketch_test.cpp +++ b/tuple/test/array_of_doubles_sketch_test.cpp @@ -23,10 +23,8 @@ #include <array> #include <catch2/catch.hpp> -#include <array_of_doubles_sketch.hpp> -#include <array_of_doubles_union.hpp> -#include <array_of_doubles_intersection.hpp> -#include <array_of_doubles_a_not_b.hpp> + +#include "array_of_doubles_sketch.hpp" namespace datasketches { @@ -166,7 +164,7 @@ TEST_CASE("aod intersection: half overlap", "[tuple_sketch]") { auto update_sketch2 = update_array_of_doubles_sketch::builder().build(); for (int i = 500; i < 1500; ++i) update_sketch2.update(i, a); - array_of_doubles_intersection<array_of_doubles_union_policy> intersection; + array_of_doubles_intersection<default_array_of_doubles_union_policy> intersection; intersection.update(update_sketch1); intersection.update(update_sketch2); auto result = intersection.get_result(); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
