Repository: arrow Updated Branches: refs/heads/master e1d574c7c -> b8754eba4
ARROW-884: [C++] Exclude internal namespaces from generated Doxygen docs This includes a fair bit of namespace scrubbing. Still lots more to do Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #918 from wesm/ARROW-884 and squashes the following commits: 7606dc5 [Wes McKinney] Typo and cpplint fixes 451eeb1 [Wes McKinney] Restore arrow::TypePtr define 6b7e632 [Wes McKinney] Fix function capitalization 97433bb [Wes McKinney] Exclude internal namespaces from generated Doxygen docs. Various cleanups of current API page to exclude internal details Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/b8754eba Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/b8754eba Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/b8754eba Branch: refs/heads/master Commit: b8754eba4683e7300a751f60a2fc1eef152cea1d Parents: e1d574c Author: Wes McKinney <wes.mckin...@twosigma.com> Authored: Tue Aug 1 15:52:43 2017 +0200 Committer: Uwe L. Korn <uw...@xhochy.com> Committed: Tue Aug 1 15:52:43 2017 +0200 ---------------------------------------------------------------------- cpp/apidoc/Doxyfile | 53 ++++-------- cpp/build-support/run_clang_format.py | 8 +- cpp/src/arrow/api.h | 3 + cpp/src/arrow/array-test.cc | 8 +- cpp/src/arrow/array.cc | 6 +- cpp/src/arrow/builder.cc | 102 ++++++++++++----------- cpp/src/arrow/builder.h | 58 +++++++------ cpp/src/arrow/compare.cc | 115 +++++++++++++------------- cpp/src/arrow/io/hdfs-internal.cc | 2 + cpp/src/arrow/io/hdfs-internal.h | 2 + cpp/src/arrow/io/hdfs.cc | 14 ++-- cpp/src/arrow/io/io-hdfs-test.cc | 2 +- cpp/src/arrow/io/memory.cc | 4 +- cpp/src/arrow/ipc/feather-internal.h | 4 +- cpp/src/arrow/ipc/feather.h | 4 +- cpp/src/arrow/ipc/ipc-read-write-test.cc | 2 +- cpp/src/arrow/ipc/test-common.h | 2 +- cpp/src/arrow/python/arrow_to_pandas.cc | 19 +++-- cpp/src/arrow/python/pandas_to_arrow.cc | 26 +++--- cpp/src/arrow/python/type_traits.h | 4 +- cpp/src/arrow/table.cc | 5 +- cpp/src/arrow/test-common.h | 4 +- cpp/src/arrow/type.cc | 5 +- cpp/src/arrow/type.h | 44 ++++++---- cpp/src/arrow/type_traits.h | 14 ++-- cpp/src/arrow/util/bit-stream-utils.h | 21 +++-- cpp/src/arrow/util/bit-util.h | 8 +- cpp/src/arrow/util/bpacking.h | 13 ++- cpp/src/arrow/util/memory.h | 2 + cpp/src/arrow/util/random.h | 23 +++--- cpp/src/arrow/util/stl-util-test.cc | 2 + cpp/src/arrow/util/stl.h | 2 + cpp/src/arrow/visitor.cc | 4 + cpp/src/arrow/visitor_inline.h | 2 + 34 files changed, 314 insertions(+), 273 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/apidoc/Doxyfile ---------------------------------------------------------------------- diff --git a/cpp/apidoc/Doxyfile b/cpp/apidoc/Doxyfile index 3127662..f32ad54 100644 --- a/cpp/apidoc/Doxyfile +++ b/cpp/apidoc/Doxyfile @@ -833,50 +833,17 @@ INPUT_ENCODING = UTF-8 # *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, # *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf. -FILE_PATTERNS = *.c \ - *.cc \ - *.cxx \ - *.cpp \ - *.c++ \ - *.java \ - *.ii \ - *.ixx \ - *.ipp \ - *.i++ \ - *.inl \ - *.idl \ - *.ddl \ - *.odl \ - *.h \ +FILE_PATTERNS = *.h \ *.hh \ *.hxx \ *.hpp \ - *.h++ \ - *.cs \ - *.d \ - *.php \ - *.php4 \ - *.php5 \ - *.phtml \ *.inc \ *.m \ *.markdown \ *.md \ *.mm \ *.dox \ - *.py \ - *.pyw \ - *.f90 \ - *.f95 \ - *.f03 \ - *.f08 \ - *.f \ - *.for \ - *.tcl \ - *.vhd \ - *.vhdl \ - *.ucf \ - *.qsf + *.py # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. @@ -908,6 +875,7 @@ EXCLUDE_SYMLINKS = NO # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = *-test.cc \ + *test* \ *_generated.h \ *-benchmark.cc @@ -920,7 +888,11 @@ EXCLUDE_PATTERNS = *-test.cc \ # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* -EXCLUDE_SYMBOLS = +EXCLUDE_SYMBOLS = detail +EXCLUDE_SYMBOLS += internal +EXCLUDE_SYMBOLS += _* +EXCLUDE_SYMBOLS += BitUtil +EXCLUDE_SYMBOLS += SSEUtil # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include @@ -2060,7 +2032,7 @@ ENABLE_PREPROCESSING = YES # The default value is: NO. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -MACRO_EXPANSION = NO +MACRO_EXPANSION = YES # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then # the macro expansion is limited to the macros specified with the PREDEFINED and @@ -2068,7 +2040,7 @@ MACRO_EXPANSION = NO # The default value is: NO. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -EXPAND_ONLY_PREDEF = NO +EXPAND_ONLY_PREDEF = YES # If the SEARCH_INCLUDES tag is set to YES, the include files in the # INCLUDE_PATH will be searched if a #include is found. @@ -2100,7 +2072,10 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -PREDEFINED = +PREDEFINED = __attribute__(x)= \ + __declspec(x)= \ + ARROW_EXPORT= \ + ARROW_EXTERN_TEMPLATE= # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/build-support/run_clang_format.py ---------------------------------------------------------------------- diff --git a/cpp/build-support/run_clang_format.py b/cpp/build-support/run_clang_format.py index ab800e6..ac4954c 100755 --- a/cpp/build-support/run_clang_format.py +++ b/cpp/build-support/run_clang_format.py @@ -57,5 +57,9 @@ for directory, subdirs, files in os.walk(SOURCE_DIR): # exit 1 # fi -subprocess.check_output([CLANG_FORMAT, '-i'] + files_to_format, - stderr=subprocess.STDOUT) +try: + subprocess.check_output([CLANG_FORMAT, '-i'] + files_to_format, + stderr=subprocess.STDOUT) +except Exception as e: + print(e) + raise http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/api.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h index 731f239..4d731bd 100644 --- a/cpp/src/arrow/api.h +++ b/cpp/src/arrow/api.h @@ -32,4 +32,7 @@ #include "arrow/type.h" #include "arrow/visitor.h" +/// \brief Top-level namespace for Apache Arrow C++ API +namespace arrow {} + #endif // ARROW_API_H http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/array-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc index 5d63d92..0efb51c 100644 --- a/cpp/src/arrow/array-test.cc +++ b/cpp/src/arrow/array-test.cc @@ -2042,9 +2042,9 @@ class TestStructBuilder : public TestBuilder { auto list_type = list(char_type); vector<std::shared_ptr<DataType>> types = {list_type, int32_type}; - vector<FieldPtr> fields; - fields.push_back(FieldPtr(new Field("list", list_type))); - fields.push_back(FieldPtr(new Field("int", int32_type))); + vector<std::shared_ptr<Field>> fields; + fields.push_back(field("list", list_type)); + fields.push_back(field("int", int32_type)); type_ = struct_(fields); value_fields_ = fields; @@ -2062,7 +2062,7 @@ class TestStructBuilder : public TestBuilder { } protected: - vector<FieldPtr> value_fields_; + vector<std::shared_ptr<Field>> value_fields_; std::shared_ptr<DataType> type_; std::shared_ptr<StructBuilder> builder_; http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/array.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc index 61791c9..ab0be7a 100644 --- a/cpp/src/arrow/array.cc +++ b/cpp/src/arrow/array.cc @@ -527,6 +527,8 @@ Status Array::Accept(ArrayVisitor* visitor) const { // ---------------------------------------------------------------------- // Implement Array::Validate as inline visitor +namespace internal { + struct ValidateVisitor { Status Visit(const NullArray& array) { return Status::OK(); } @@ -658,8 +660,10 @@ struct ValidateVisitor { } }; +} // namespace internal + Status ValidateArray(const Array& array) { - ValidateVisitor validate_visitor; + internal::ValidateVisitor validate_visitor; return VisitArrayInline(array, &validate_visitor); } http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/builder.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index d3a299e..391204f 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -37,6 +37,10 @@ namespace arrow { +using internal::AdaptiveIntBuilderBase; +using internal::ArrayData; +using internal::WrappedBinary; + Status ArrayBuilder::AppendToBitmap(bool is_valid) { if (length_ == capacity_) { // If the capacity was not already a multiple of 2, do so here @@ -338,7 +342,7 @@ Status AdaptiveIntBuilder::Append(const int64_t* values, int64_t length, uint8_t new_int_size = int_size_; for (int64_t i = 0; i < length; i++) { if (valid_bytes == nullptr || valid_bytes[i]) { - new_int_size = expanded_int_size(values[i], new_int_size); + new_int_size = internal::ExpandedIntSize(values[i], new_int_size); } } if (new_int_size != int_size_) { @@ -495,7 +499,7 @@ Status AdaptiveUIntBuilder::Append(const uint64_t* values, int64_t length, uint8_t new_int_size = int_size_; for (int64_t i = 0; i < length; i++) { if (valid_bytes == nullptr || valid_bytes[i]) { - new_int_size = expanded_uint_size(values[i], new_int_size); + new_int_size = internal::ExpandedUIntSize(values[i], new_int_size); } } if (new_int_size != int_size_) { @@ -861,48 +865,47 @@ Status DictionaryBuilder<T>::AppendDictionary(const Scalar& value) { return dict_builder_.Append(value); } -#define BINARY_DICTIONARY_SPECIALIZATIONS(Type) \ - template <> \ - internal::WrappedBinary DictionaryBuilder<Type>::GetDictionaryValue(int64_t index) { \ - int32_t v_len; \ - const uint8_t* v = dict_builder_.GetValue(static_cast<int64_t>(index), &v_len); \ - return internal::WrappedBinary(v, v_len); \ - } \ - \ - template <> \ - Status DictionaryBuilder<Type>::AppendDictionary( \ - const internal::WrappedBinary& value) { \ - return dict_builder_.Append(value.ptr_, value.length_); \ - } \ - \ - template <> \ - Status DictionaryBuilder<Type>::AppendArray(const Array& array) { \ - const BinaryArray& binary_array = static_cast<const BinaryArray&>(array); \ - internal::WrappedBinary value(nullptr, 0); \ - for (int64_t i = 0; i < array.length(); i++) { \ - if (array.IsNull(i)) { \ - RETURN_NOT_OK(AppendNull()); \ - } else { \ - value.ptr_ = binary_array.GetValue(i, &value.length_); \ - RETURN_NOT_OK(Append(value)); \ - } \ - } \ - return Status::OK(); \ - } \ - \ - template <> \ - int DictionaryBuilder<Type>::HashValue(const internal::WrappedBinary& value) { \ - return HashUtil::Hash(value.ptr_, value.length_, 0); \ - } \ - \ - template <> \ - bool DictionaryBuilder<Type>::SlotDifferent(hash_slot_t index, \ - const internal::WrappedBinary& value) { \ - int32_t other_length; \ - const uint8_t* other_value = \ - dict_builder_.GetValue(static_cast<int64_t>(index), &other_length); \ - return !(other_length == value.length_ && \ - 0 == memcmp(other_value, value.ptr_, value.length_)); \ +#define BINARY_DICTIONARY_SPECIALIZATIONS(Type) \ + template <> \ + WrappedBinary DictionaryBuilder<Type>::GetDictionaryValue(int64_t index) { \ + int32_t v_len; \ + const uint8_t* v = dict_builder_.GetValue(static_cast<int64_t>(index), &v_len); \ + return WrappedBinary(v, v_len); \ + } \ + \ + template <> \ + Status DictionaryBuilder<Type>::AppendDictionary(const WrappedBinary& value) { \ + return dict_builder_.Append(value.ptr_, value.length_); \ + } \ + \ + template <> \ + Status DictionaryBuilder<Type>::AppendArray(const Array& array) { \ + const BinaryArray& binary_array = static_cast<const BinaryArray&>(array); \ + WrappedBinary value(nullptr, 0); \ + for (int64_t i = 0; i < array.length(); i++) { \ + if (array.IsNull(i)) { \ + RETURN_NOT_OK(AppendNull()); \ + } else { \ + value.ptr_ = binary_array.GetValue(i, &value.length_); \ + RETURN_NOT_OK(Append(value)); \ + } \ + } \ + return Status::OK(); \ + } \ + \ + template <> \ + int DictionaryBuilder<Type>::HashValue(const WrappedBinary& value) { \ + return HashUtil::Hash(value.ptr_, value.length_, 0); \ + } \ + \ + template <> \ + bool DictionaryBuilder<Type>::SlotDifferent(hash_slot_t index, \ + const WrappedBinary& value) { \ + int32_t other_length; \ + const uint8_t* other_value = \ + dict_builder_.GetValue(static_cast<int64_t>(index), &other_length); \ + return !(other_length == value.length_ && \ + 0 == memcmp(other_value, value.ptr_, value.length_)); \ } BINARY_DICTIONARY_SPECIALIZATIONS(StringType); @@ -1132,7 +1135,7 @@ Status BinaryBuilder::AppendNull() { return Status::OK(); } -Status BinaryBuilder::FinishInternal(std::shared_ptr<internal::ArrayData>* out) { +Status BinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) { // Write final offset (values length) RETURN_NOT_OK(AppendNextOffset()); std::shared_ptr<Buffer> offsets, value_data; @@ -1141,13 +1144,12 @@ Status BinaryBuilder::FinishInternal(std::shared_ptr<internal::ArrayData>* out) RETURN_NOT_OK(value_data_builder_.Finish(&value_data)); BufferVector buffers = {null_bitmap_, offsets, value_data}; - *out = std::make_shared<internal::ArrayData>(type_, length_, std::move(buffers), - null_count_, 0); + *out = std::make_shared<ArrayData>(type_, length_, std::move(buffers), null_count_, 0); return Status::OK(); } Status BinaryBuilder::Finish(std::shared_ptr<Array>* out) { - std::shared_ptr<internal::ArrayData> data; + std::shared_ptr<ArrayData> data; RETURN_NOT_OK(FinishInternal(&data)); *out = std::make_shared<BinaryArray>(data); Reset(); @@ -1174,7 +1176,7 @@ const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const { StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(pool, utf8()) {} Status StringBuilder::Finish(std::shared_ptr<Array>* out) { - std::shared_ptr<internal::ArrayData> data; + std::shared_ptr<ArrayData> data; RETURN_NOT_OK(FinishInternal(&data)); *out = std::make_shared<StringArray>(data); Reset(); @@ -1299,7 +1301,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type, } case Type::STRUCT: { - const std::vector<FieldPtr>& fields = type->children(); + const std::vector<std::shared_ptr<Field>>& fields = type->children(); std::vector<std::unique_ptr<ArrayBuilder>> values_builder; for (auto it : fields) { http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/builder.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index 080a329..009fd7a 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -262,6 +262,8 @@ using HalfFloatBuilder = NumericBuilder<HalfFloatType>; using FloatBuilder = NumericBuilder<FloatType>; using DoubleBuilder = NumericBuilder<DoubleType>; +namespace internal { + class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder { public: explicit AdaptiveIntBuilderBase(MemoryPool* pool); @@ -295,7 +297,29 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder { }; // Check if we would need to expand the underlying storage type -inline uint8_t expanded_uint_size(uint64_t val, uint8_t current_int_size) { +inline uint8_t ExpandedIntSize(int64_t val, uint8_t current_int_size) { + if (current_int_size == 8 || + (current_int_size < 8 && + (val > static_cast<int64_t>(std::numeric_limits<int32_t>::max()) || + val < static_cast<int64_t>(std::numeric_limits<int32_t>::min())))) { + return 8; + } else if (current_int_size == 4 || + (current_int_size < 4 && + (val > static_cast<int64_t>(std::numeric_limits<int16_t>::max()) || + val < static_cast<int64_t>(std::numeric_limits<int16_t>::min())))) { + return 4; + } else if (current_int_size == 2 || + (current_int_size == 1 && + (val > static_cast<int64_t>(std::numeric_limits<int8_t>::max()) || + val < static_cast<int64_t>(std::numeric_limits<int8_t>::min())))) { + return 2; + } else { + return 1; + } +} + +// Check if we would need to expand the underlying storage type +inline uint8_t ExpandedUIntSize(uint64_t val, uint8_t current_int_size) { if (current_int_size == 8 || (current_int_size < 8 && (val > static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))) { @@ -313,7 +337,9 @@ inline uint8_t expanded_uint_size(uint64_t val, uint8_t current_int_size) { } } -class ARROW_EXPORT AdaptiveUIntBuilder : public AdaptiveIntBuilderBase { +} // namespace internal + +class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase { public: explicit AdaptiveUIntBuilder(MemoryPool* pool); @@ -324,7 +350,7 @@ class ARROW_EXPORT AdaptiveUIntBuilder : public AdaptiveIntBuilderBase { RETURN_NOT_OK(Reserve(1)); BitUtil::SetBit(null_bitmap_data_, length_); - uint8_t new_int_size = expanded_uint_size(val, int_size_); + uint8_t new_int_size = internal::ExpandedUIntSize(val, int_size_); if (new_int_size != int_size_) { RETURN_NOT_OK(ExpandIntSize(new_int_size)); } @@ -372,29 +398,7 @@ class ARROW_EXPORT AdaptiveUIntBuilder : public AdaptiveIntBuilderBase { Status ExpandIntSizeN(); }; -// Check if we would need to expand the underlying storage type -inline uint8_t expanded_int_size(int64_t val, uint8_t current_int_size) { - if (current_int_size == 8 || - (current_int_size < 8 && - (val > static_cast<int64_t>(std::numeric_limits<int32_t>::max()) || - val < static_cast<int64_t>(std::numeric_limits<int32_t>::min())))) { - return 8; - } else if (current_int_size == 4 || - (current_int_size < 4 && - (val > static_cast<int64_t>(std::numeric_limits<int16_t>::max()) || - val < static_cast<int64_t>(std::numeric_limits<int16_t>::min())))) { - return 4; - } else if (current_int_size == 2 || - (current_int_size == 1 && - (val > static_cast<int64_t>(std::numeric_limits<int8_t>::max()) || - val < static_cast<int64_t>(std::numeric_limits<int8_t>::min())))) { - return 2; - } else { - return 1; - } -} - -class ARROW_EXPORT AdaptiveIntBuilder : public AdaptiveIntBuilderBase { +class ARROW_EXPORT AdaptiveIntBuilder : public internal::AdaptiveIntBuilderBase { public: explicit AdaptiveIntBuilder(MemoryPool* pool); @@ -405,7 +409,7 @@ class ARROW_EXPORT AdaptiveIntBuilder : public AdaptiveIntBuilderBase { RETURN_NOT_OK(Reserve(1)); BitUtil::SetBit(null_bitmap_data_, length_); - uint8_t new_int_size = expanded_int_size(val, int_size_); + uint8_t new_int_size = internal::ExpandedIntSize(val, int_size_); if (new_int_size != int_size_) { RETURN_NOT_OK(ExpandIntSize(new_int_size)); } http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/compare.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index da10c2a..dda5fdd 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -38,6 +38,8 @@ namespace arrow { // ---------------------------------------------------------------------- // Public method implementations +namespace internal { + class RangeEqualsVisitor { public: RangeEqualsVisitor(const Array& right, int64_t left_start_idx, int64_t left_end_idx, @@ -673,63 +675,6 @@ inline Status ArrayEqualsImpl(const Array& left, const Array& right, bool* are_e return Status::OK(); } -Status ArrayEquals(const Array& left, const Array& right, bool* are_equal) { - return ArrayEqualsImpl<ArrayEqualsVisitor>(left, right, are_equal); -} - -Status ArrayApproxEquals(const Array& left, const Array& right, bool* are_equal) { - return ArrayEqualsImpl<ApproxEqualsVisitor>(left, right, are_equal); -} - -Status ArrayRangeEquals(const Array& left, const Array& right, int64_t left_start_idx, - int64_t left_end_idx, int64_t right_start_idx, bool* are_equal) { - if (&left == &right) { - *are_equal = true; - } else if (left.type_id() != right.type_id()) { - *are_equal = false; - } else if (left.length() == 0) { - *are_equal = true; - } else { - RangeEqualsVisitor visitor(right, left_start_idx, left_end_idx, right_start_idx); - RETURN_NOT_OK(VisitArrayInline(left, &visitor)); - *are_equal = visitor.result(); - } - return Status::OK(); -} - -// ---------------------------------------------------------------------- -// Implement TensorEquals - -Status TensorEquals(const Tensor& left, const Tensor& right, bool* are_equal) { - // The arrays are the same object - if (&left == &right) { - *are_equal = true; - } else if (left.type_id() != right.type_id()) { - *are_equal = false; - } else if (left.size() == 0) { - *are_equal = true; - } else { - if (!left.is_contiguous() || !right.is_contiguous()) { - return Status::NotImplemented( - "Comparison not implemented for non-contiguous tensors"); - } - - const auto& size_meta = dynamic_cast<const FixedWidthType&>(*left.type()); - const int byte_width = size_meta.bit_width() / 8; - DCHECK_GT(byte_width, 0); - - const uint8_t* left_data = left.data()->data(); - const uint8_t* right_data = right.data()->data(); - - *are_equal = - memcmp(left_data, right_data, static_cast<size_t>(byte_width * left.size())) == 0; - } - return Status::OK(); -} - -// ---------------------------------------------------------------------- -// Implement TypeEquals - class TypeEqualsVisitor { public: explicit TypeEqualsVisitor(const DataType& right) : right_(right), result_(false) {} @@ -835,6 +780,60 @@ class TypeEqualsVisitor { bool result_; }; +} // namespace internal + +Status ArrayEquals(const Array& left, const Array& right, bool* are_equal) { + return internal::ArrayEqualsImpl<internal::ArrayEqualsVisitor>(left, right, are_equal); +} + +Status ArrayApproxEquals(const Array& left, const Array& right, bool* are_equal) { + return internal::ArrayEqualsImpl<internal::ApproxEqualsVisitor>(left, right, are_equal); +} + +Status ArrayRangeEquals(const Array& left, const Array& right, int64_t left_start_idx, + int64_t left_end_idx, int64_t right_start_idx, bool* are_equal) { + if (&left == &right) { + *are_equal = true; + } else if (left.type_id() != right.type_id()) { + *are_equal = false; + } else if (left.length() == 0) { + *are_equal = true; + } else { + internal::RangeEqualsVisitor visitor(right, left_start_idx, left_end_idx, + right_start_idx); + RETURN_NOT_OK(VisitArrayInline(left, &visitor)); + *are_equal = visitor.result(); + } + return Status::OK(); +} + +Status TensorEquals(const Tensor& left, const Tensor& right, bool* are_equal) { + // The arrays are the same object + if (&left == &right) { + *are_equal = true; + } else if (left.type_id() != right.type_id()) { + *are_equal = false; + } else if (left.size() == 0) { + *are_equal = true; + } else { + if (!left.is_contiguous() || !right.is_contiguous()) { + return Status::NotImplemented( + "Comparison not implemented for non-contiguous tensors"); + } + + const auto& size_meta = dynamic_cast<const FixedWidthType&>(*left.type()); + const int byte_width = size_meta.bit_width() / 8; + DCHECK_GT(byte_width, 0); + + const uint8_t* left_data = left.data()->data(); + const uint8_t* right_data = right.data()->data(); + + *are_equal = + memcmp(left_data, right_data, static_cast<size_t>(byte_width * left.size())) == 0; + } + return Status::OK(); +} + Status TypeEquals(const DataType& left, const DataType& right, bool* are_equal) { // The arrays are the same object if (&left == &right) { @@ -842,7 +841,7 @@ Status TypeEquals(const DataType& left, const DataType& right, bool* are_equal) } else if (left.id() != right.id()) { *are_equal = false; } else { - TypeEqualsVisitor visitor(right); + internal::TypeEqualsVisitor visitor(right); RETURN_NOT_OK(VisitTypeInline(left, &visitor)); *are_equal = visitor.result(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/io/hdfs-internal.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/io/hdfs-internal.cc b/cpp/src/arrow/io/hdfs-internal.cc index fd7417b..35657df 100644 --- a/cpp/src/arrow/io/hdfs-internal.cc +++ b/cpp/src/arrow/io/hdfs-internal.cc @@ -282,6 +282,7 @@ static inline void* GetLibrarySymbol(void* handle, const char* symbol) { namespace arrow { namespace io { +namespace internal { static LibHdfsShim libhdfs_shim; static LibHdfsShim libhdfs3_shim; @@ -556,5 +557,6 @@ Status ConnectLibHdfs3(LibHdfsShim** driver) { return shim->GetRequiredSymbols(); } +} // namespace internal } // namespace io } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/io/hdfs-internal.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/io/hdfs-internal.h b/cpp/src/arrow/io/hdfs-internal.h index db6a21c..f2de00d 100644 --- a/cpp/src/arrow/io/hdfs-internal.h +++ b/cpp/src/arrow/io/hdfs-internal.h @@ -32,6 +32,7 @@ namespace arrow { class Status; namespace io { +namespace internal { // NOTE(wesm): cpplint does not like use of short and other imprecise C types struct LibHdfsShim { @@ -205,6 +206,7 @@ struct LibHdfsShim { Status ARROW_EXPORT ConnectLibHdfs(LibHdfsShim** driver); Status ARROW_EXPORT ConnectLibHdfs3(LibHdfsShim** driver); +} // namespace internal } // namespace io } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/io/hdfs.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/io/hdfs.cc b/cpp/src/arrow/io/hdfs.cc index 254e483..ba446b5 100644 --- a/cpp/src/arrow/io/hdfs.cc +++ b/cpp/src/arrow/io/hdfs.cc @@ -61,7 +61,7 @@ static constexpr int kDefaultHdfsBufferSize = 1 << 16; class HdfsAnyFileImpl { public: - void set_members(const std::string& path, LibHdfsShim* driver, hdfsFS fs, + void set_members(const std::string& path, internal::LibHdfsShim* driver, hdfsFS fs, hdfsFile handle) { path_ = path; driver_ = driver; @@ -88,7 +88,7 @@ class HdfsAnyFileImpl { protected: std::string path_; - LibHdfsShim* driver_; + internal::LibHdfsShim* driver_; // For threadsafety std::mutex lock_; @@ -507,7 +507,7 @@ class HadoopFileSystem::HadoopFileSystemImpl { } private: - LibHdfsShim* driver_; + internal::LibHdfsShim* driver_; std::string namenode_host_; std::string user_; @@ -613,13 +613,13 @@ Status HadoopFileSystem::Rename(const std::string& src, const std::string& dst) // Allow public API users to check whether we are set up correctly Status HaveLibHdfs() { - LibHdfsShim* driver; - return ConnectLibHdfs(&driver); + internal::LibHdfsShim* driver; + return internal::ConnectLibHdfs(&driver); } Status HaveLibHdfs3() { - LibHdfsShim* driver; - return ConnectLibHdfs3(&driver); + internal::LibHdfsShim* driver; + return internal::ConnectLibHdfs3(&driver); } } // namespace io http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/io/io-hdfs-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/io/io-hdfs-test.cc b/cpp/src/arrow/io/io-hdfs-test.cc index 92f4291..b6a40e0 100644 --- a/cpp/src/arrow/io/io-hdfs-test.cc +++ b/cpp/src/arrow/io/io-hdfs-test.cc @@ -84,7 +84,7 @@ class TestHadoopFileSystem : public ::testing::Test { // Set up shared state between unit tests void SetUp() { - LibHdfsShim* driver_shim; + internal::LibHdfsShim* driver_shim; client_ = nullptr; scratch_dir_ = http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/io/memory.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/io/memory.cc b/cpp/src/arrow/io/memory.cc index b6c48ec..50f3ddf 100644 --- a/cpp/src/arrow/io/memory.cc +++ b/cpp/src/arrow/io/memory.cc @@ -157,8 +157,8 @@ Status FixedSizeBufferWriter::Tell(int64_t* position) { Status FixedSizeBufferWriter::Write(const uint8_t* data, int64_t nbytes) { if (nbytes > memcopy_threshold_ && memcopy_num_threads_ > 1) { - parallel_memcopy(mutable_data_ + position_, data, nbytes, memcopy_blocksize_, - memcopy_num_threads_); + internal::parallel_memcopy(mutable_data_ + position_, data, nbytes, + memcopy_blocksize_, memcopy_num_threads_); } else { memcpy(mutable_data_ + position_, data, nbytes); } http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/ipc/feather-internal.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/feather-internal.h b/cpp/src/arrow/ipc/feather-internal.h index 36cfecc..1b5924e 100644 --- a/cpp/src/arrow/ipc/feather-internal.h +++ b/cpp/src/arrow/ipc/feather-internal.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -/// Public API for the "Feather" file format, originally created at -/// http://github.com/wesm/feather +// Public API for the "Feather" file format, originally created at +// http://github.com/wesm/feather #ifndef ARROW_IPC_FEATHER_INTERNAL_H #define ARROW_IPC_FEATHER_INTERNAL_H http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/ipc/feather.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/feather.h b/cpp/src/arrow/ipc/feather.h index 8abcb5c..2ab35a9 100644 --- a/cpp/src/arrow/ipc/feather.h +++ b/cpp/src/arrow/ipc/feather.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -/// Public API for the "Feather" file format, originally created at -/// http://github.com/wesm/feather +// Public API for the "Feather" file format, originally created at +// http://github.com/wesm/feather #ifndef ARROW_IPC_FEATHER_H #define ARROW_IPC_FEATHER_H http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/ipc/ipc-read-write-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/ipc-read-write-test.cc b/cpp/src/arrow/ipc/ipc-read-write-test.cc index 6c70517..a6246c9 100644 --- a/cpp/src/arrow/ipc/ipc-read-write-test.cc +++ b/cpp/src/arrow/ipc/ipc-read-write-test.cc @@ -409,7 +409,7 @@ class RecursionLimits : public ::testing::Test, public io::MemoryMapFixture { int64_t* body_length, std::shared_ptr<RecordBatch>* batch, std::shared_ptr<Schema>* schema) { const int batch_length = 5; - TypePtr type = int32(); + auto type = int32(); std::shared_ptr<Array> array; const bool include_nulls = true; RETURN_NOT_OK(MakeRandomInt32Array(1000, include_nulls, pool_, &array)); http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/ipc/test-common.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h index a876792..cb82737 100644 --- a/cpp/src/arrow/ipc/test-common.h +++ b/cpp/src/arrow/ipc/test-common.h @@ -333,7 +333,7 @@ Status MakeNonNullRecordBatch(std::shared_ptr<RecordBatch>* out) { Status MakeDeeplyNestedList(std::shared_ptr<RecordBatch>* out) { const int batch_length = 5; - TypePtr type = int32(); + auto type = int32(); MemoryPool* pool = default_memory_pool(); std::shared_ptr<Array> array; http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/python/arrow_to_pandas.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc index 462bdb7..86f82fd 100644 --- a/cpp/src/arrow/python/arrow_to_pandas.cc +++ b/cpp/src/arrow/python/arrow_to_pandas.cc @@ -56,6 +56,9 @@ namespace arrow { namespace py { +using internal::kPandasTimestampNull; +using internal::kNanosecondsInDay; + // ---------------------------------------------------------------------- // Utility code @@ -752,7 +755,7 @@ class IntBlock : public PandasBlock { public: using PandasBlock::PandasBlock; Status Allocate() override { - return AllocateNDArray(arrow_traits<ARROW_TYPE>::npy_type); + return AllocateNDArray(internal::arrow_traits<ARROW_TYPE>::npy_type); } Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement, @@ -981,7 +984,7 @@ class CategoricalBlock : public PandasBlock { public: explicit CategoricalBlock(int64_t num_rows) : PandasBlock(num_rows, 1) {} Status Allocate() override { - constexpr int npy_type = arrow_traits<ARROW_INDEX_TYPE>::npy_type; + constexpr int npy_type = internal::arrow_traits<ARROW_INDEX_TYPE>::npy_type; if (!(npy_type == NPY_INT8 || npy_type == NPY_INT16 || npy_type == NPY_INT32 || npy_type == NPY_INT64)) { @@ -992,7 +995,7 @@ class CategoricalBlock : public PandasBlock { Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement, int64_t rel_placement) override { - using T = typename arrow_traits<ARROW_INDEX_TYPE>::T; + using T = typename internal::arrow_traits<ARROW_INDEX_TYPE>::T; T* out_values = reinterpret_cast<T*>(block_data_) + rel_placement * num_rows_; @@ -1381,7 +1384,7 @@ class ArrowDeserializer { template <int TYPE> Status ConvertValuesZeroCopy(int npy_type, std::shared_ptr<Array> arr) { - typedef typename arrow_traits<TYPE>::T T; + typedef typename internal::arrow_traits<TYPE>::T T; auto prim_arr = static_cast<PrimitiveArray*>(arr.get()); auto in_values = reinterpret_cast<const T*>(prim_arr->raw_values()); @@ -1425,7 +1428,7 @@ class ArrowDeserializer { typename std::enable_if<std::is_base_of<FloatingPoint, Type>::value, Status>::type Visit(const Type& type) { constexpr int TYPE = Type::type_id; - using traits = arrow_traits<TYPE>; + using traits = internal::arrow_traits<TYPE>; typedef typename traits::T T; int npy_type = traits::npy_type; @@ -1447,7 +1450,7 @@ class ArrowDeserializer { Status>::type Visit(const Type& type) { constexpr int TYPE = Type::type_id; - using traits = arrow_traits<TYPE>; + using traits = internal::arrow_traits<TYPE>; typedef typename traits::T T; @@ -1480,7 +1483,7 @@ class ArrowDeserializer { typename std::enable_if<std::is_base_of<Integer, Type>::value, Status>::type Visit( const Type& type) { constexpr int TYPE = Type::type_id; - using traits = arrow_traits<TYPE>; + using traits = internal::arrow_traits<TYPE>; typedef typename traits::T T; @@ -1535,7 +1538,7 @@ class ArrowDeserializer { if (data_.null_count() > 0) { return VisitObjects(ConvertBooleanWithNulls); } else { - RETURN_NOT_OK(AllocateOutput(arrow_traits<Type::BOOL>::npy_type)); + RETURN_NOT_OK(AllocateOutput(internal::arrow_traits<Type::BOOL>::npy_type)); auto out_values = reinterpret_cast<uint8_t*>(PyArray_DATA(arr_)); ConvertBooleanNoNulls(data_, out_values); } http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/python/pandas_to_arrow.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/pandas_to_arrow.cc b/cpp/src/arrow/python/pandas_to_arrow.cc index be5634b..2fbed1b 100644 --- a/cpp/src/arrow/python/pandas_to_arrow.cc +++ b/cpp/src/arrow/python/pandas_to_arrow.cc @@ -50,8 +50,14 @@ #include "arrow/python/util/datetime.h" namespace arrow { + +using internal::ArrayData; +using internal::MakeArray; + namespace py { +using internal::NumPyTypeSize; + // ---------------------------------------------------------------------- // Conversion utilities @@ -83,7 +89,7 @@ static inline bool PyObject_is_integer(const PyObject* obj) { template <int TYPE> static int64_t ValuesToBitmap(PyArrayObject* arr, uint8_t* bitmap) { - typedef npy_traits<TYPE> traits; + typedef internal::npy_traits<TYPE> traits; typedef typename traits::value_type T; int64_t null_count = 0; @@ -120,7 +126,7 @@ static int64_t MaskToBitmap(PyArrayObject* mask, int64_t length, uint8_t* bitmap template <int TYPE> static int64_t ValuesToValidBytes(const void* data, int64_t length, uint8_t* valid_bytes) { - typedef npy_traits<TYPE> traits; + typedef internal::npy_traits<TYPE> traits; typedef typename traits::value_type T; int64_t null_count = 0; @@ -306,16 +312,16 @@ class PandasConverter { return Status::OK(); } - Status PushArray(const std::shared_ptr<internal::ArrayData>& data) { + Status PushArray(const std::shared_ptr<ArrayData>& data) { std::shared_ptr<Array> result; - RETURN_NOT_OK(internal::MakeArray(data, &result)); + RETURN_NOT_OK(MakeArray(data, &result)); out_arrays_.emplace_back(std::move(result)); return Status::OK(); } template <typename ArrowType> Status VisitNative() { - using traits = arrow_traits<ArrowType::type_id>; + using traits = internal::arrow_traits<ArrowType::type_id>; if (mask_ != nullptr || traits::supports_nulls) { RETURN_NOT_OK(InitNullBitmap()); @@ -334,8 +340,8 @@ class PandasConverter { } BufferVector buffers = {null_bitmap_, data}; - return PushArray(std::make_shared<internal::ArrayData>( - type_, length_, std::move(buffers), null_count, 0)); + return PushArray( + std::make_shared<ArrayData>(type_, length_, std::move(buffers), null_count, 0)); } template <typename T> @@ -448,13 +454,13 @@ void CopyStrided<PyObject*>(PyObject** input_data, int64_t length, int64_t strid template <typename ArrowType> inline Status PandasConverter::ConvertData(std::shared_ptr<Buffer>* data) { - using traits = arrow_traits<ArrowType::type_id>; + using traits = internal::arrow_traits<ArrowType::type_id>; using T = typename traits::T; // Handle LONGLONG->INT64 and other fun things int type_num_compat = cast_npy_type_compat(PyArray_DESCR(arr_)->type_num); - if (numpy_type_size(traits::npy_type) != numpy_type_size(type_num_compat)) { + if (NumPyTypeSize(traits::npy_type) != NumPyTypeSize(type_num_compat)) { return Status::NotImplemented("NumPy type casts not yet implemented"); } @@ -925,7 +931,7 @@ Status LoopPySequence(PyObject* sequence, T func) { template <int ITEM_TYPE, typename ArrowType> inline Status PandasConverter::ConvertTypedLists(const std::shared_ptr<DataType>& type, ListBuilder* builder, PyObject* list) { - typedef npy_traits<ITEM_TYPE> traits; + typedef internal::npy_traits<ITEM_TYPE> traits; typedef typename traits::value_type T; typedef typename traits::BuilderClass BuilderT; http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/python/type_traits.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/type_traits.h b/cpp/src/arrow/python/type_traits.h index b6761ae..2cbbdf4 100644 --- a/cpp/src/arrow/python/type_traits.h +++ b/cpp/src/arrow/python/type_traits.h @@ -30,6 +30,7 @@ namespace arrow { namespace py { +namespace internal { template <int TYPE> struct npy_traits {}; @@ -227,7 +228,7 @@ struct arrow_traits<Type::BINARY> { static constexpr bool supports_nulls = true; }; -static inline int numpy_type_size(int npy_type) { +static inline int NumPyTypeSize(int npy_type) { switch (npy_type) { case NPY_BOOL: return 1; @@ -272,5 +273,6 @@ static inline int numpy_type_size(int npy_type) { return -1; } +} // namespace internal } // namespace py } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/table.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc index a0a2507..665ce2d 100644 --- a/cpp/src/arrow/table.cc +++ b/cpp/src/arrow/table.cc @@ -387,7 +387,7 @@ Status Table::RemoveColumn(int i, std::shared_ptr<Table>* out) const { std::shared_ptr<Schema> new_schema; RETURN_NOT_OK(schema_->RemoveField(i, &new_schema)); - *out = std::make_shared<Table>(new_schema, DeleteVectorElement(columns_, i)); + *out = std::make_shared<Table>(new_schema, internal::DeleteVectorElement(columns_, i)); return Status::OK(); } @@ -411,7 +411,8 @@ Status Table::AddColumn(int i, const std::shared_ptr<Column>& col, std::shared_ptr<Schema> new_schema; RETURN_NOT_OK(schema_->AddField(i, col->field(), &new_schema)); - *out = std::make_shared<Table>(new_schema, AddVectorElement(columns_, i, col)); + *out = + std::make_shared<Table>(new_schema, internal::AddVectorElement(columns_, i, col)); return Status::OK(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/test-common.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/test-common.h b/cpp/src/arrow/test-common.h index b3e5af8..4ce0640 100644 --- a/cpp/src/arrow/test-common.h +++ b/cpp/src/arrow/test-common.h @@ -70,7 +70,7 @@ class TestBuilder : public ::testing::Test { public: void SetUp() { pool_ = default_memory_pool(); - type_ = TypePtr(new UInt8Type()); + type_ = uint8(); builder_.reset(new UInt8Builder(pool_)); builder_nn_.reset(new UInt8Builder(pool_)); } @@ -78,7 +78,7 @@ class TestBuilder : public ::testing::Test { protected: MemoryPool* pool_; - TypePtr type_; + std::shared_ptr<DataType> type_; std::unique_ptr<ArrayBuilder> builder_; std::unique_ptr<ArrayBuilder> builder_nn_; }; http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/type.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index 586da2d..b8489d4 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -293,7 +293,8 @@ Status Schema::AddField(int i, const std::shared_ptr<Field>& field, DCHECK_GE(i, 0); DCHECK_LE(i, this->num_fields()); - *out = std::make_shared<Schema>(AddVectorElement(fields_, i, field), metadata_); + *out = + std::make_shared<Schema>(internal::AddVectorElement(fields_, i, field), metadata_); return Status::OK(); } @@ -316,7 +317,7 @@ Status Schema::RemoveField(int i, std::shared_ptr<Schema>* out) const { DCHECK_GE(i, 0); DCHECK_LT(i, this->num_fields()); - *out = std::make_shared<Schema>(DeleteVectorElement(fields_, i), metadata_); + *out = std::make_shared<Schema>(internal::DeleteVectorElement(fields_, i), metadata_); return Status::OK(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/type.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index e0df722..45d97fd 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -162,7 +162,8 @@ class ARROW_EXPORT DataType { DISALLOW_COPY_AND_ASSIGN(DataType); }; -typedef std::shared_ptr<DataType> TypePtr; +// TODO(wesm): Remove this from parquet-cpp +using TypePtr = std::shared_ptr<DataType>; class ARROW_EXPORT FixedWidthType : public DataType { public: @@ -241,7 +242,7 @@ class ARROW_EXPORT Field { std::shared_ptr<const KeyValueMetadata> metadata_; }; -typedef std::shared_ptr<Field> FieldPtr; +namespace detail { template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE> class ARROW_EXPORT CTypeImpl : public BASE { @@ -260,6 +261,13 @@ class ARROW_EXPORT CTypeImpl : public BASE { std::string ToString() const override { return std::string(DERIVED::name()); } }; +template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE> +class IntegerTypeImpl : public detail::CTypeImpl<DERIVED, Integer, TYPE_ID, C_TYPE> { + bool is_signed() const override { return std::is_signed<C_TYPE>::value; } +}; + +} // namespace detail + class ARROW_EXPORT NullType : public DataType, public NoExtraMeta { public: static constexpr Type::type type_id = Type::NA; @@ -274,11 +282,6 @@ class ARROW_EXPORT NullType : public DataType, public NoExtraMeta { std::vector<BufferDescr> GetBufferLayout() const override; }; -template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE> -class IntegerTypeImpl : public CTypeImpl<DERIVED, Integer, TYPE_ID, C_TYPE> { - bool is_signed() const override { return std::is_signed<C_TYPE>::value; } -}; - class ARROW_EXPORT BooleanType : public FixedWidthType, public NoExtraMeta { public: static constexpr Type::type type_id = Type::BOOL; @@ -292,65 +295,70 @@ class ARROW_EXPORT BooleanType : public FixedWidthType, public NoExtraMeta { static std::string name() { return "bool"; } }; -class ARROW_EXPORT UInt8Type : public IntegerTypeImpl<UInt8Type, Type::UINT8, uint8_t> { +class ARROW_EXPORT UInt8Type + : public detail::IntegerTypeImpl<UInt8Type, Type::UINT8, uint8_t> { public: static std::string name() { return "uint8"; } }; -class ARROW_EXPORT Int8Type : public IntegerTypeImpl<Int8Type, Type::INT8, int8_t> { +class ARROW_EXPORT Int8Type + : public detail::IntegerTypeImpl<Int8Type, Type::INT8, int8_t> { public: static std::string name() { return "int8"; } }; class ARROW_EXPORT UInt16Type - : public IntegerTypeImpl<UInt16Type, Type::UINT16, uint16_t> { + : public detail::IntegerTypeImpl<UInt16Type, Type::UINT16, uint16_t> { public: static std::string name() { return "uint16"; } }; -class ARROW_EXPORT Int16Type : public IntegerTypeImpl<Int16Type, Type::INT16, int16_t> { +class ARROW_EXPORT Int16Type + : public detail::IntegerTypeImpl<Int16Type, Type::INT16, int16_t> { public: static std::string name() { return "int16"; } }; class ARROW_EXPORT UInt32Type - : public IntegerTypeImpl<UInt32Type, Type::UINT32, uint32_t> { + : public detail::IntegerTypeImpl<UInt32Type, Type::UINT32, uint32_t> { public: static std::string name() { return "uint32"; } }; -class ARROW_EXPORT Int32Type : public IntegerTypeImpl<Int32Type, Type::INT32, int32_t> { +class ARROW_EXPORT Int32Type + : public detail::IntegerTypeImpl<Int32Type, Type::INT32, int32_t> { public: static std::string name() { return "int32"; } }; class ARROW_EXPORT UInt64Type - : public IntegerTypeImpl<UInt64Type, Type::UINT64, uint64_t> { + : public detail::IntegerTypeImpl<UInt64Type, Type::UINT64, uint64_t> { public: static std::string name() { return "uint64"; } }; -class ARROW_EXPORT Int64Type : public IntegerTypeImpl<Int64Type, Type::INT64, int64_t> { +class ARROW_EXPORT Int64Type + : public detail::IntegerTypeImpl<Int64Type, Type::INT64, int64_t> { public: static std::string name() { return "int64"; } }; class ARROW_EXPORT HalfFloatType - : public CTypeImpl<HalfFloatType, FloatingPoint, Type::HALF_FLOAT, uint16_t> { + : public detail::CTypeImpl<HalfFloatType, FloatingPoint, Type::HALF_FLOAT, uint16_t> { public: Precision precision() const override; static std::string name() { return "halffloat"; } }; class ARROW_EXPORT FloatType - : public CTypeImpl<FloatType, FloatingPoint, Type::FLOAT, float> { + : public detail::CTypeImpl<FloatType, FloatingPoint, Type::FLOAT, float> { public: Precision precision() const override; static std::string name() { return "float"; } }; class ARROW_EXPORT DoubleType - : public CTypeImpl<DoubleType, FloatingPoint, Type::DOUBLE, double> { + : public detail::CTypeImpl<DoubleType, FloatingPoint, Type::DOUBLE, double> { public: Precision precision() const override; static std::string name() { return "double"; } http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/type_traits.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index 8be67b2..973b0e1 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -296,6 +296,8 @@ struct TypeTraits<DictionaryType> { constexpr static bool is_parameter_free = false; }; +namespace detail { + // Not all type classes have a c_type template <typename T> struct as_void { @@ -319,11 +321,13 @@ GET_ATTR(TypeClass, void); #undef GET_ATTR -#define PRIMITIVE_TRAITS(T) \ - using TypeClass = \ - typename std::conditional<std::is_base_of<DataType, T>::value, T, \ - typename GetAttr_TypeClass<T>::type>::type; \ - using c_type = typename GetAttr_c_type<TypeClass>::type; +} // namespace detail + +#define PRIMITIVE_TRAITS(T) \ + using TypeClass = \ + typename std::conditional<std::is_base_of<DataType, T>::value, T, \ + typename detail::GetAttr_TypeClass<T>::type>::type; \ + using c_type = typename detail::GetAttr_c_type<TypeClass>::type; template <typename T> struct IsUnsignedInt { http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/util/bit-stream-utils.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/bit-stream-utils.h b/cpp/src/arrow/util/bit-stream-utils.h index 318f5ba..d312fef 100644 --- a/cpp/src/arrow/util/bit-stream-utils.h +++ b/cpp/src/arrow/util/bit-stream-utils.h @@ -227,6 +227,8 @@ inline bool BitWriter::PutVlqInt(uint32_t v) { return result; } +namespace detail { + template <typename T> inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer, int* bit_offset, int* byte_offset, uint64_t* buffered_values) { @@ -264,6 +266,8 @@ inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer, } } +} // namespace detail + template <typename T> inline bool BitReader::GetValue(int num_bits, T* v) { return GetBatch(num_bits, v, 1) == 1; @@ -291,15 +295,15 @@ inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) { int i = 0; if (UNLIKELY(bit_offset != 0)) { for (; i < batch_size && bit_offset != 0; ++i) { - GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset, - &buffered_values); + detail::GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset, + &buffered_values); } } if (sizeof(T) == 4) { int num_unpacked = - unpack32(reinterpret_cast<const uint32_t*>(buffer + byte_offset), - reinterpret_cast<uint32_t*>(v + i), batch_size - i, num_bits); + internal::unpack32(reinterpret_cast<const uint32_t*>(buffer + byte_offset), + reinterpret_cast<uint32_t*>(v + i), batch_size - i, num_bits); i += num_unpacked; byte_offset += num_unpacked * num_bits / 8; } else { @@ -307,8 +311,9 @@ inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) { uint32_t unpack_buffer[buffer_size]; while (i < batch_size) { int unpack_size = std::min(buffer_size, batch_size - i); - int num_unpacked = unpack32(reinterpret_cast<const uint32_t*>(buffer + byte_offset), - unpack_buffer, unpack_size, num_bits); + int num_unpacked = + internal::unpack32(reinterpret_cast<const uint32_t*>(buffer + byte_offset), + unpack_buffer, unpack_size, num_bits); if (num_unpacked == 0) { break; } @@ -335,8 +340,8 @@ inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) { } for (; i < batch_size; ++i) { - GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset, - &buffered_values); + detail::GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset, + &buffered_values); } bit_offset_ = bit_offset; http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/util/bit-util.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h index f036763..fc360ba 100644 --- a/cpp/src/arrow/util/bit-util.h +++ b/cpp/src/arrow/util/bit-util.h @@ -66,6 +66,8 @@ namespace arrow { // // We add a partial stub implementation here +namespace detail { + template <typename T> struct make_unsigned {}; @@ -89,6 +91,8 @@ struct make_unsigned<int64_t> { typedef uint64_t type; }; +} // namespace detail + class Buffer; class MemoryPool; class MutableBuffer; @@ -253,7 +257,7 @@ static inline int Popcount(uint64_t x) { template <typename T> static inline int PopcountSigned(T v) { // Converting to same-width unsigned then extending preserves the bit pattern. - return BitUtil::Popcount(static_cast<typename make_unsigned<T>::type>(v)); + return BitUtil::Popcount(static_cast<typename detail::make_unsigned<T>::type>(v)); } /// Returns the 'num_bits' least-significant bits of 'v'. @@ -364,7 +368,7 @@ static inline uint16_t FromBigEndian(uint16_t val) { return val; } template <typename T> static T ShiftRightLogical(T v, int shift) { // Conversion to unsigned ensures most significant bits always filled with 0's - return static_cast<typename make_unsigned<T>::type>(v) >> shift; + return static_cast<typename detail::make_unsigned<T>::type>(v) >> shift; } void FillBitsFromBytes(const std::vector<uint8_t>& bytes, uint8_t* bits); http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/util/bpacking.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/bpacking.h b/cpp/src/arrow/util/bpacking.h index 4d25de0..14258cf 100644 --- a/cpp/src/arrow/util/bpacking.h +++ b/cpp/src/arrow/util/bpacking.h @@ -20,12 +20,9 @@ // https://github.com/lemire/FrameOfReference/blob/6ccaf9e97160f9a3b299e23a8ef739e711ef0c71/src/bpacking.cpp // The original copyright notice follows. -/** -* -* This code is released under the -* Apache License Version 2.0 http://www.apache.org/licenses/. -* (c) Daniel Lemire 2013 -*/ +// This code is released under the +// Apache License Version 2.0 http://www.apache.org/licenses/. +// (c) Daniel Lemire 2013 #ifndef ARROW_UTIL_BPACKING_H #define ARROW_UTIL_BPACKING_H @@ -33,6 +30,7 @@ #include "arrow/util/logging.h" namespace arrow { +namespace internal { inline const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) { *out = ((*in) >> 0) & 1; @@ -3304,6 +3302,7 @@ inline int unpack32(const uint32_t* in, uint32_t* out, int batch_size, int num_b return batch_size; } -}; // namespace arrow +} // namespace internal +} // namespace arrow #endif // ARROW_UTIL_BPACKING_H http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/util/memory.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/memory.h b/cpp/src/arrow/util/memory.h index fce9e19..fef6b31 100644 --- a/cpp/src/arrow/util/memory.h +++ b/cpp/src/arrow/util/memory.h @@ -22,6 +22,7 @@ #include <vector> namespace arrow { +namespace internal { uint8_t* pointer_logical_and(const uint8_t* address, uintptr_t bits) { uintptr_t value = reinterpret_cast<uintptr_t>(address); @@ -66,6 +67,7 @@ void parallel_memcopy(uint8_t* dst, const uint8_t* src, int64_t nbytes, } } +} // namespace internal } // namespace arrow #endif // ARROW_UTIL_MEMORY_H http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/util/random.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/random.h b/cpp/src/arrow/util/random.h index ec48d5d..2e05a73 100644 --- a/cpp/src/arrow/util/random.h +++ b/cpp/src/arrow/util/random.h @@ -12,13 +12,14 @@ #include <cmath> namespace arrow { - -namespace random_internal { +namespace internal { +namespace random { static const uint32_t M = 2147483647L; // 2^31-1 const double kTwoPi = 6.283185307179586476925286; -} // namespace random_internal +} // namespace random +} // namespace internal // A very simple random number generator. Not especially good at // generating truly random bits, but good enough for our needs in this @@ -27,7 +28,7 @@ class Random { public: explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) { // Avoid bad seeds. - if (seed_ == 0 || seed_ == random_internal::M) { + if (seed_ == 0 || seed_ == internal::random::M) { seed_ = 1; } } @@ -46,12 +47,12 @@ class Random { uint64_t product = seed_ * A; // Compute (product % M) using the fact that ((x << 31) % M) == x. - seed_ = static_cast<uint32_t>((product >> 31) + (product & random_internal::M)); + seed_ = static_cast<uint32_t>((product >> 31) + (product & internal::random::M)); // The first reduction may overflow by 1 bit, so we may need to // repeat. mod == M is not possible; using > allows the faster // sign-bit-based test. - if (seed_ > random_internal::M) { - seed_ -= random_internal::M; + if (seed_ > internal::random::M) { + seed_ -= internal::random::M; } return seed_; } @@ -99,16 +100,16 @@ class Random { // Adapted from WebRTC source code at: // webrtc/trunk/modules/video_coding/main/test/test_util.cc double Normal(double mean, double std_dev) { - double uniform1 = (Next() + 1.0) / (random_internal::M + 1.0); - double uniform2 = (Next() + 1.0) / (random_internal::M + 1.0); + double uniform1 = (Next() + 1.0) / (internal::random::M + 1.0); + double uniform2 = (Next() + 1.0) / (internal::random::M + 1.0); return (mean + std_dev * sqrt(-2 * ::log(uniform1)) * - cos(random_internal::kTwoPi * uniform2)); + cos(internal::random::kTwoPi * uniform2)); } // Return a random number between 0.0 and 1.0 inclusive. double NextDoubleFraction() { - return Next() / static_cast<double>(random_internal::M + 1.0); + return Next() / static_cast<double>(internal::random::M + 1.0); } private: http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/util/stl-util-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/stl-util-test.cc b/cpp/src/arrow/util/stl-util-test.cc index 526520e..629eb24 100644 --- a/cpp/src/arrow/util/stl-util-test.cc +++ b/cpp/src/arrow/util/stl-util-test.cc @@ -25,6 +25,7 @@ #include "arrow/test-util.h" namespace arrow { +namespace internal { TEST(StlUtilTest, VectorAddRemoveTest) { std::vector<int> values; @@ -57,4 +58,5 @@ TEST(StlUtilTest, VectorAddRemoveTest) { EXPECT_TRUE(result3.empty()); } +} // namespace internal } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/util/stl.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/stl.h b/cpp/src/arrow/util/stl.h index 4b8916f..27c1778 100644 --- a/cpp/src/arrow/util/stl.h +++ b/cpp/src/arrow/util/stl.h @@ -23,6 +23,7 @@ #include "arrow/util/logging.h" namespace arrow { +namespace internal { template <typename T> inline std::vector<T> DeleteVectorElement(const std::vector<T>& values, size_t index) { @@ -55,6 +56,7 @@ inline std::vector<T> AddVectorElement(const std::vector<T>& values, size_t inde return out; } +} // namespace internal } // namespace arrow #endif // ARROW_UTIL_STL_H http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/visitor.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/visitor.cc b/cpp/src/arrow/visitor.cc index 1175789..203ed6d 100644 --- a/cpp/src/arrow/visitor.cc +++ b/cpp/src/arrow/visitor.cc @@ -59,6 +59,8 @@ Status ArrayVisitor::Visit(const DecimalArray& array) { return Status::NotImplemented("decimal"); } +#undef ARRAY_VISITOR_DEFAULT + // ---------------------------------------------------------------------- // Default implementations of TypeVisitor methods @@ -95,4 +97,6 @@ TYPE_VISITOR_DEFAULT(StructType); TYPE_VISITOR_DEFAULT(UnionType); TYPE_VISITOR_DEFAULT(DictionaryType); +#undef TYPE_VISITOR_DEFAULT + } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/b8754eba/cpp/src/arrow/visitor_inline.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/visitor_inline.h b/cpp/src/arrow/visitor_inline.h index 7478950..54f9e88 100644 --- a/cpp/src/arrow/visitor_inline.h +++ b/cpp/src/arrow/visitor_inline.h @@ -65,6 +65,8 @@ inline Status VisitTypeInline(const DataType& type, VISITOR* visitor) { return Status::NotImplemented("Type not implemented"); } +#undef TYPE_VISIT_INLINE + #define ARRAY_VISIT_INLINE(TYPE_CLASS) \ case TYPE_CLASS::type_id: \ return visitor->Visit( \