Repository: arrow Updated Branches: refs/heads/master 5ad498833 -> 60b5832e4
ARROW-686: [C++] Account for time metadata changes, add Time32 and Time64 types This also has a little visitor refactoring Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #432 from wesm/ARROW-686 and squashes the following commits: 300c7f2 [Wes McKinney] Fix glib for time32/64 changes be4c976 [Wes McKinney] Remove JSON time todo 504059a [Wes McKinney] Remove copy ctors to fix MSVC linker error ae574ce [Wes McKinney] Some cleaning cf9783c [Wes McKinney] Add new time types to Python bindings 95f5a05 [Wes McKinney] Implement Time32 and Time64 types, IPC roundtrip Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/60b5832e Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/60b5832e Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/60b5832e Branch: refs/heads/master Commit: 60b5832e4c75457e98b5caf7a8622c2201de2cd5 Parents: 5ad4988 Author: Wes McKinney <wes.mckin...@twosigma.com> Authored: Fri Mar 24 18:27:16 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Fri Mar 24 18:27:16 2017 -0400 ---------------------------------------------------------------------- c_glib/arrow-glib/type.cpp | 6 +- c_glib/arrow-glib/type.h | 6 +- cpp/src/arrow/array-decimal-test.cc | 6 -- cpp/src/arrow/array-primitive-test.cc | 3 - cpp/src/arrow/array.cc | 3 +- cpp/src/arrow/array.h | 5 +- cpp/src/arrow/builder.cc | 28 +++-- cpp/src/arrow/builder.h | 4 +- cpp/src/arrow/compare.cc | 60 +++++++---- cpp/src/arrow/ipc/feather-test.cc | 2 +- cpp/src/arrow/ipc/feather.cc | 14 ++- cpp/src/arrow/ipc/ipc-json-test.cc | 8 +- cpp/src/arrow/ipc/json-internal.cc | 47 +++++++-- cpp/src/arrow/ipc/metadata.cc | 20 +++- cpp/src/arrow/ipc/test-common.h | 26 +++-- cpp/src/arrow/ipc/writer.cc | 3 +- cpp/src/arrow/pretty_print.cc | 164 ++++++++++++----------------- cpp/src/arrow/type-test.cc | 36 ++++--- cpp/src/arrow/type.cc | 41 ++++++-- cpp/src/arrow/type.h | 63 +++++++---- cpp/src/arrow/type_fwd.h | 11 +- cpp/src/arrow/type_traits.h | 49 ++++++++- cpp/src/arrow/visitor.cc | 6 +- cpp/src/arrow/visitor.h | 6 +- cpp/src/arrow/visitor_inline.h | 41 +++++++- python/pyarrow/array.pyx | 10 +- python/pyarrow/includes/libarrow.pxd | 15 ++- 27 files changed, 446 insertions(+), 237 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/c_glib/arrow-glib/type.cpp ---------------------------------------------------------------------- diff --git a/c_glib/arrow-glib/type.cpp b/c_glib/arrow-glib/type.cpp index 2e59647..8adbaa9 100644 --- a/c_glib/arrow-glib/type.cpp +++ b/c_glib/arrow-glib/type.cpp @@ -72,8 +72,10 @@ garrow_type_from_raw(arrow::Type::type type) return GARROW_TYPE_DATE64; case arrow::Type::type::TIMESTAMP: return GARROW_TYPE_TIMESTAMP; - case arrow::Type::type::TIME: - return GARROW_TYPE_TIME; + case arrow::Type::type::TIME32: + return GARROW_TYPE_TIME32; + case arrow::Type::type::TIME64: + return GARROW_TYPE_TIME64; case arrow::Type::type::INTERVAL: return GARROW_TYPE_INTERVAL; case arrow::Type::type::DECIMAL: http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/c_glib/arrow-glib/type.h ---------------------------------------------------------------------- diff --git a/c_glib/arrow-glib/type.h b/c_glib/arrow-glib/type.h index cd6137c..e171aa3 100644 --- a/c_glib/arrow-glib/type.h +++ b/c_glib/arrow-glib/type.h @@ -44,7 +44,8 @@ G_BEGIN_DECLS * @GARROW_TYPE_DATE64: int64 milliseconds since the UNIX epoch. * @GARROW_TYPE_TIMESTAMP: Exact timestamp encoded with int64 since UNIX epoch. * Default unit millisecond. - * @GARROW_TYPE_TIME: Exact time encoded with int64, default unit millisecond. + * @GARROW_TYPE_TIME32: Exact time encoded with int32, supporting seconds or milliseconds + * @GARROW_TYPE_TIME64: Exact time encoded with int64, supporting micro- or nanoseconds * @GARROW_TYPE_INTERVAL: YEAR_MONTH or DAY_TIME interval in SQL style. * @GARROW_TYPE_DECIMAL: Precision- and scale-based decimal * type. Storage type depends on the parameters. @@ -74,7 +75,8 @@ typedef enum { GARROW_TYPE_DATE32, GARROW_TYPE_DATE64, GARROW_TYPE_TIMESTAMP, - GARROW_TYPE_TIME, + GARROW_TYPE_TIME32, + GARROW_TYPE_TIME64, GARROW_TYPE_INTERVAL, GARROW_TYPE_DECIMAL, GARROW_TYPE_LIST, http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/array-decimal-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/array-decimal-test.cc b/cpp/src/arrow/array-decimal-test.cc index 9e00fd9..b64023b 100644 --- a/cpp/src/arrow/array-decimal-test.cc +++ b/cpp/src/arrow/array-decimal-test.cc @@ -29,12 +29,6 @@ TEST(TypesTest, TestDecimalType) { ASSERT_EQ(t1.scale, 4); ASSERT_EQ(t1.ToString(), std::string("decimal(8, 4)")); - - // Test copy constructor - DecimalType t2 = t1; - ASSERT_EQ(t2.type, Type::DECIMAL); - ASSERT_EQ(t2.precision, 8); - ASSERT_EQ(t2.scale, 4); } } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/array-primitive-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/array-primitive-test.cc b/cpp/src/arrow/array-primitive-test.cc index 6863e58..fe60170 100644 --- a/cpp/src/arrow/array-primitive-test.cc +++ b/cpp/src/arrow/array-primitive-test.cc @@ -47,9 +47,6 @@ class Array; \ ASSERT_EQ(tp.type, Type::ENUM); \ ASSERT_EQ(tp.ToString(), string(NAME)); \ - \ - KLASS tp_copy = tp; \ - ASSERT_EQ(tp_copy.type, Type::ENUM); \ } PRIMITIVE_TEST(Int8Type, INT8, "int8"); http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/array.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc index 20b732a..f1c8bd4 100644 --- a/cpp/src/arrow/array.cc +++ b/cpp/src/arrow/array.cc @@ -483,7 +483,8 @@ template class NumericArray<Int64Type>; template class NumericArray<TimestampType>; template class NumericArray<Date32Type>; template class NumericArray<Date64Type>; -template class NumericArray<TimeType>; +template class NumericArray<Time32Type>; +template class NumericArray<Time64Type>; template class NumericArray<HalfFloatType>; template class NumericArray<FloatType>; template class NumericArray<DoubleType>; http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/array.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h index 2a072db..c73b7a8 100644 --- a/cpp/src/arrow/array.h +++ b/cpp/src/arrow/array.h @@ -527,10 +527,11 @@ extern template class ARROW_EXPORT NumericArray<UInt64Type>; extern template class ARROW_EXPORT NumericArray<HalfFloatType>; extern template class ARROW_EXPORT NumericArray<FloatType>; extern template class ARROW_EXPORT NumericArray<DoubleType>; -extern template class ARROW_EXPORT NumericArray<TimestampType>; extern template class ARROW_EXPORT NumericArray<Date32Type>; extern template class ARROW_EXPORT NumericArray<Date64Type>; -extern template class ARROW_EXPORT NumericArray<TimeType>; +extern template class ARROW_EXPORT NumericArray<Time32Type>; +extern template class ARROW_EXPORT NumericArray<Time64Type>; +extern template class ARROW_EXPORT NumericArray<TimestampType>; #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/builder.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index 483d6f0..52a785d 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -240,8 +240,9 @@ template class PrimitiveBuilder<Int32Type>; template class PrimitiveBuilder<Int64Type>; template class PrimitiveBuilder<Date32Type>; template class PrimitiveBuilder<Date64Type>; +template class PrimitiveBuilder<Time32Type>; +template class PrimitiveBuilder<Time64Type>; template class PrimitiveBuilder<TimestampType>; -template class PrimitiveBuilder<TimeType>; template class PrimitiveBuilder<HalfFloatType>; template class PrimitiveBuilder<FloatType>; template class PrimitiveBuilder<DoubleType>; @@ -511,9 +512,9 @@ std::shared_ptr<ArrayBuilder> StructBuilder::field_builder(int pos) const { // ---------------------------------------------------------------------- // Helper functions -#define BUILDER_CASE(ENUM, BuilderType) \ - case Type::ENUM: \ - out->reset(new BuilderType(pool)); \ +#define BUILDER_CASE(ENUM, BuilderType) \ + case Type::ENUM: \ + out->reset(new BuilderType(pool, type)); \ return Status::OK(); // Initially looked at doing this with vtables, but shared pointers makes it @@ -533,17 +534,14 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type, BUILDER_CASE(INT64, Int64Builder); BUILDER_CASE(DATE32, Date32Builder); BUILDER_CASE(DATE64, Date64Builder); - case Type::TIMESTAMP: - out->reset(new TimestampBuilder(pool, type)); - return Status::OK(); - case Type::TIME: - out->reset(new TimeBuilder(pool, type)); - return Status::OK(); - BUILDER_CASE(BOOL, BooleanBuilder); - BUILDER_CASE(FLOAT, FloatBuilder); - BUILDER_CASE(DOUBLE, DoubleBuilder); - BUILDER_CASE(STRING, StringBuilder); - BUILDER_CASE(BINARY, BinaryBuilder); + BUILDER_CASE(TIME32, Time32Builder); + BUILDER_CASE(TIME64, Time64Builder); + BUILDER_CASE(TIMESTAMP, TimestampBuilder); + BUILDER_CASE(BOOL, BooleanBuilder); + BUILDER_CASE(FLOAT, FloatBuilder); + BUILDER_CASE(DOUBLE, DoubleBuilder); + BUILDER_CASE(STRING, StringBuilder); + BUILDER_CASE(BINARY, BinaryBuilder); case Type::LIST: { std::shared_ptr<ArrayBuilder> value_builder; std::shared_ptr<DataType> value_type = http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/builder.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index 7cefa64..bd957b3 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -231,7 +231,8 @@ using Int16Builder = NumericBuilder<Int16Type>; using Int32Builder = NumericBuilder<Int32Type>; using Int64Builder = NumericBuilder<Int64Type>; using TimestampBuilder = NumericBuilder<TimestampType>; -using TimeBuilder = NumericBuilder<TimeType>; +using Time32Builder = NumericBuilder<Time32Type>; +using Time64Builder = NumericBuilder<Time64Type>; using Date32Builder = NumericBuilder<Date32Type>; using Date64Builder = NumericBuilder<Date64Type>; @@ -378,6 +379,7 @@ class ARROW_EXPORT BinaryBuilder : public ListBuilder { // String builder class ARROW_EXPORT StringBuilder : public BinaryBuilder { public: + using BinaryBuilder::BinaryBuilder; explicit StringBuilder(MemoryPool* pool); using BinaryBuilder::Append; http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/compare.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index 3e6ecef..13511cf 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -29,6 +29,7 @@ #include "arrow/type_traits.h" #include "arrow/util/bit-util.h" #include "arrow/util/logging.h" +#include "arrow/visitor_inline.h" namespace arrow { @@ -177,7 +178,13 @@ class RangeEqualsVisitor : public ArrayVisitor { return CompareValues<Date64Array>(left); } - Status Visit(const TimeArray& left) override { return CompareValues<TimeArray>(left); } + Status Visit(const Time32Array& left) override { + return CompareValues<Time32Array>(left); + } + + Status Visit(const Time64Array& left) override { + return CompareValues<Time64Array>(left); + } Status Visit(const TimestampArray& left) override { return CompareValues<TimestampArray>(left); @@ -415,7 +422,9 @@ class ArrayEqualsVisitor : public RangeEqualsVisitor { Status Visit(const Date64Array& left) override { return ComparePrimitive(left); } - Status Visit(const TimeArray& left) override { return ComparePrimitive(left); } + Status Visit(const Time32Array& left) override { return ComparePrimitive(left); } + + Status Visit(const Time64Array& left) override { return ComparePrimitive(left); } Status Visit(const TimestampArray& left) override { return ComparePrimitive(left); } @@ -628,7 +637,7 @@ Status ArrayApproxEquals(const Array& left, const Array& right, bool* are_equal) // ---------------------------------------------------------------------- // Implement TypeEquals -class TypeEqualsVisitor : public TypeVisitor { +class TypeEqualsVisitor { public: explicit TypeEqualsVisitor(const DataType& right) : right_(right), result_(false) {} @@ -648,29 +657,44 @@ class TypeEqualsVisitor : public TypeVisitor { return Status::OK(); } - Status Visit(const TimeType& left) override { - const auto& right = static_cast<const TimeType&>(right_); + template <typename T> + typename std::enable_if<std::is_base_of<NoExtraMeta, T>::value || + std::is_base_of<PrimitiveCType, T>::value, + Status>::type + Visit(const T& type) { + result_ = true; + return Status::OK(); + } + + Status Visit(const Time32Type& left) { + const auto& right = static_cast<const Time32Type&>(right_); result_ = left.unit == right.unit; return Status::OK(); } - Status Visit(const TimestampType& left) override { + Status Visit(const Time64Type& left) { + const auto& right = static_cast<const Time64Type&>(right_); + result_ = left.unit == right.unit; + return Status::OK(); + } + + Status Visit(const TimestampType& left) { const auto& right = static_cast<const TimestampType&>(right_); result_ = left.unit == right.unit && left.timezone == right.timezone; return Status::OK(); } - Status Visit(const FixedWidthBinaryType& left) override { + Status Visit(const FixedWidthBinaryType& left) { const auto& right = static_cast<const FixedWidthBinaryType&>(right_); result_ = left.byte_width() == right.byte_width(); return Status::OK(); } - Status Visit(const ListType& left) override { return VisitChildren(left); } + Status Visit(const ListType& left) { return VisitChildren(left); } - Status Visit(const StructType& left) override { return VisitChildren(left); } + Status Visit(const StructType& left) { return VisitChildren(left); } - Status Visit(const UnionType& left) override { + Status Visit(const UnionType& left) { const auto& right = static_cast<const UnionType&>(right_); if (left.mode != right.mode || left.type_codes.size() != right.type_codes.size()) { @@ -691,7 +715,7 @@ class TypeEqualsVisitor : public TypeVisitor { return Status::OK(); } - Status Visit(const DictionaryType& left) override { + Status Visit(const DictionaryType& left) { const auto& right = static_cast<const DictionaryType&>(right_); result_ = left.index_type()->Equals(right.index_type()) && left.dictionary()->Equals(right.dictionary()); @@ -713,18 +737,8 @@ Status TypeEquals(const DataType& left, const DataType& right, bool* are_equal) *are_equal = false; } else { TypeEqualsVisitor visitor(right); - Status s = left.Accept(&visitor); - - // We do not implement any type visitors where there is no additional - // metadata to compare. - if (s.IsNotImplemented()) { - // Not implemented means there is no additional metadata to compare - *are_equal = true; - } else if (!s.ok()) { - return s; - } else { - *are_equal = visitor.result(); - } + RETURN_NOT_OK(VisitTypeInline(left, &visitor)); + *are_equal = visitor.result(); } return Status::OK(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/ipc/feather-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/feather-test.cc b/cpp/src/arrow/ipc/feather-test.cc index 2513887..e181f69 100644 --- a/cpp/src/arrow/ipc/feather-test.cc +++ b/cpp/src/arrow/ipc/feather-test.cc @@ -353,7 +353,7 @@ TEST_F(TestTableWriter, CategoryRoundtrip) { TEST_F(TestTableWriter, TimeTypes) { std::vector<bool> is_valid = {true, true, true, false, true, true, true}; auto f0 = field("f0", date32()); - auto f1 = field("f1", time(TimeUnit::MILLI)); + auto f1 = field("f1", time32(TimeUnit::MILLI)); auto f2 = field("f2", timestamp(TimeUnit::NANO)); auto f3 = field("f3", timestamp(TimeUnit::SECOND, "US/Los_Angeles")); std::shared_ptr<Schema> schema(new Schema({f0, f1, f2, f3})); http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/ipc/feather.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/feather.cc b/cpp/src/arrow/ipc/feather.cc index 0dd9a81..000bba9 100644 --- a/cpp/src/arrow/ipc/feather.cc +++ b/cpp/src/arrow/ipc/feather.cc @@ -294,7 +294,7 @@ class TableReader::TableReaderImpl { break; case fbs::TypeMetadata_TimeMetadata: { auto meta = static_cast<const fbs::TimeMetadata*>(metadata); - *out = std::make_shared<TimeType>(FromFlatbufferEnum(meta->unit())); + *out = time32(FromFlatbufferEnum(meta->unit())); } break; default: switch (values->type()) { @@ -476,7 +476,9 @@ fbs::Type ToFlatbufferType(Type::type type) { return fbs::Type_DATE; case Type::TIMESTAMP: return fbs::Type_TIMESTAMP; - case Type::TIME: + case Type::TIME32: + return fbs::Type_TIME; + case Type::TIME64: return fbs::Type_TIME; case Type::DICTIONARY: return fbs::Type_CATEGORY; @@ -646,13 +648,17 @@ class TableWriter::TableWriterImpl : public ArrayVisitor { return Status::OK(); } - Status Visit(const TimeArray& values) override { + Status Visit(const Time32Array& values) override { RETURN_NOT_OK(WritePrimitiveValues(values)); - auto unit = static_cast<const TimeType&>(*values.type()).unit; + auto unit = static_cast<const Time32Type&>(*values.type()).unit; current_column_->SetTime(unit); return Status::OK(); } + Status Visit(const Time64Array& values) override { + return Status::NotImplemented("time64"); + } + Status Append(const std::string& name, const Array& values) { current_column_ = metadata_.AddColumn(name); RETURN_NOT_OK(values.Accept(this)); http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/ipc/ipc-json-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/ipc-json-test.cc b/cpp/src/arrow/ipc/ipc-json-test.cc index fd35182..e943ef1 100644 --- a/cpp/src/arrow/ipc/ipc-json-test.cc +++ b/cpp/src/arrow/ipc/ipc-json-test.cc @@ -52,7 +52,9 @@ void TestSchemaRoundTrip(const Schema& schema) { std::shared_ptr<Schema> out; ASSERT_OK(ReadJsonSchema(d, &out)); - ASSERT_TRUE(schema.Equals(out)); + if (!schema.Equals(out)) { + FAIL() << "In schema: " << schema.ToString() << "\nOut schema: " << out->ToString(); + } } void TestArrayRoundTrip(const Array& array) { @@ -105,8 +107,8 @@ TEST(TestJsonSchemaWriter, FlatTypes) { field("f10", utf8()), field("f11", binary()), field("f12", list(int32())), field("f13", struct_({field("s1", int32()), field("s2", utf8())})), field("f15", date64()), field("f16", timestamp(TimeUnit::NANO)), - field("f17", time(TimeUnit::MICRO)), - field("f18", union_({field("u1", int8()), field("u2", time(TimeUnit::MILLI))}, + field("f17", time64(TimeUnit::MICRO)), + field("f18", union_({field("u1", int8()), field("u2", time32(TimeUnit::MILLI))}, {0, 1}, UnionMode::DENSE))}; Schema schema(fields); http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/ipc/json-internal.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc index 08f0bdc..3484680 100644 --- a/cpp/src/arrow/ipc/json-internal.cc +++ b/cpp/src/arrow/ipc/json-internal.cc @@ -133,7 +133,10 @@ class JsonSchemaWriter : public TypeVisitor { } template <typename T> - typename std::enable_if<std::is_base_of<NoExtraMeta, T>::value, void>::type + typename std::enable_if<std::is_base_of<NoExtraMeta, T>::value || + std::is_base_of<ListType, T>::value || + std::is_base_of<StructType, T>::value, + void>::type WriteTypeMetadata(const T& type) {} template <typename T> @@ -167,7 +170,8 @@ class JsonSchemaWriter : public TypeVisitor { } template <typename T> - typename std::enable_if<std::is_base_of<TimeType, T>::value || + typename std::enable_if<std::is_base_of<Time32Type, T>::value || + std::is_base_of<Time64Type, T>::value || std::is_base_of<TimestampType, T>::value, void>::type WriteTypeMetadata(const T& type) { @@ -305,7 +309,9 @@ class JsonSchemaWriter : public TypeVisitor { Status Visit(const Date64Type& type) override { return WritePrimitive("date", type); } - Status Visit(const TimeType& type) override { return WritePrimitive("time", type); } + Status Visit(const Time32Type& type) override { return WritePrimitive("time", type); } + + Status Visit(const Time64Type& type) override { return WritePrimitive("time", type); } Status Visit(const TimestampType& type) override { return WritePrimitive("timestamp", type); @@ -650,15 +656,35 @@ class JsonSchemaReader { return Status::OK(); } - template <typename T> - Status GetTimeLike(const RjObject& json_type, std::shared_ptr<DataType>* type) { + Status GetTime(const RjObject& json_type, std::shared_ptr<DataType>* type) { const auto& json_unit = json_type.FindMember("unit"); RETURN_NOT_STRING("unit", json_unit, json_type); std::string unit_str = json_unit->value.GetString(); - TimeUnit unit; + if (unit_str == "SECOND") { + *type = time32(TimeUnit::SECOND); + } else if (unit_str == "MILLISECOND") { + *type = time32(TimeUnit::MILLI); + } else if (unit_str == "MICROSECOND") { + *type = time64(TimeUnit::MICRO); + } else if (unit_str == "NANOSECOND") { + *type = time64(TimeUnit::NANO); + } else { + std::stringstream ss; + ss << "Invalid time unit: " << unit_str; + return Status::Invalid(ss.str()); + } + return Status::OK(); + } + + Status GetTimestamp(const RjObject& json_type, std::shared_ptr<DataType>* type) { + const auto& json_unit = json_type.FindMember("unit"); + RETURN_NOT_STRING("unit", json_unit, json_type); + std::string unit_str = json_unit->value.GetString(); + + TimeUnit unit; if (unit_str == "SECOND") { unit = TimeUnit::SECOND; } else if (unit_str == "MILLISECOND") { @@ -673,7 +699,7 @@ class JsonSchemaReader { return Status::Invalid(ss.str()); } - *type = std::make_shared<T>(unit); + *type = timestamp(unit); return Status::OK(); } @@ -736,9 +762,9 @@ class JsonSchemaReader { // TODO *type = date64(); } else if (type_name == "time") { - return GetTimeLike<TimeType>(json_type, type); + return GetTime(json_type, type); } else if (type_name == "timestamp") { - return GetTimeLike<TimestampType>(json_type, type); + return GetTimestamp(json_type, type); } else if (type_name == "list") { *type = list(children[0]); } else if (type_name == "struct") { @@ -1063,7 +1089,8 @@ class JsonArrayReader { NOT_IMPLEMENTED_CASE(DATE32); NOT_IMPLEMENTED_CASE(DATE64); NOT_IMPLEMENTED_CASE(TIMESTAMP); - NOT_IMPLEMENTED_CASE(TIME); + NOT_IMPLEMENTED_CASE(TIME32); + NOT_IMPLEMENTED_CASE(TIME64); NOT_IMPLEMENTED_CASE(INTERVAL); TYPE_CASE(ListType); TYPE_CASE(StructType); http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/ipc/metadata.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc index 14cb627..17af563 100644 --- a/cpp/src/arrow/ipc/metadata.cc +++ b/cpp/src/arrow/ipc/metadata.cc @@ -251,7 +251,16 @@ static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data, } case flatbuf::Type_Time: { auto time_type = static_cast<const flatbuf::Time*>(type_data); - *out = time(FromFlatbufferUnit(time_type->unit())); + TimeUnit unit = FromFlatbufferUnit(time_type->unit()); + switch (unit) { + case TimeUnit::SECOND: + case TimeUnit::MILLI: + *out = time32(unit); + break; + default: + *out = time64(unit); + break; + } return Status::OK(); } case flatbuf::Type_Timestamp: { @@ -371,8 +380,13 @@ static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type, *out_type = flatbuf::Type_Date; *offset = flatbuf::CreateDate(fbb, flatbuf::DateUnit_MILLISECOND).Union(); break; - case Type::TIME: { - const auto& time_type = static_cast<const TimeType&>(*type); + case Type::TIME32: { + const auto& time_type = static_cast<const Time32Type&>(*type); + *out_type = flatbuf::Type_Time; + *offset = flatbuf::CreateTime(fbb, ToFlatbufferUnit(time_type.unit)).Union(); + } break; + case Type::TIME64: { + const auto& time_type = static_cast<const Time64Type&>(*type); *out_type = flatbuf::Type_Time; *offset = flatbuf::CreateTime(fbb, ToFlatbufferUnit(time_type.unit)).Union(); } break; http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/ipc/test-common.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h index 4085ecf..7ee57d2 100644 --- a/cpp/src/arrow/ipc/test-common.h +++ b/cpp/src/arrow/ipc/test-common.h @@ -505,20 +505,24 @@ Status MakeTimestamps(std::shared_ptr<RecordBatch>* out) { Status MakeTimes(std::shared_ptr<RecordBatch>* out) { std::vector<bool> is_valid = {true, true, true, false, true, true, true}; - auto f0 = field("f0", time(TimeUnit::MILLI)); - auto f1 = field("f1", time(TimeUnit::NANO)); - auto f2 = field("f2", time(TimeUnit::SECOND)); - std::shared_ptr<Schema> schema(new Schema({f0, f1, f2})); - - std::vector<int64_t> ts_values = {1489269000000, 1489270000000, 1489271000000, + auto f0 = field("f0", time32(TimeUnit::MILLI)); + auto f1 = field("f1", time64(TimeUnit::NANO)); + auto f2 = field("f2", time32(TimeUnit::SECOND)); + auto f3 = field("f3", time64(TimeUnit::NANO)); + std::shared_ptr<Schema> schema(new Schema({f0, f1, f2, f3})); + + std::vector<int32_t> t32_values = { + 1489269000, 1489270000, 1489271000, 1489272000, 1489272000, 1489273000}; + std::vector<int64_t> t64_values = {1489269000000, 1489270000000, 1489271000000, 1489272000000, 1489272000000, 1489273000000}; - std::shared_ptr<Array> a0, a1, a2; - ArrayFromVector<TimeType, int64_t>(f0->type, is_valid, ts_values, &a0); - ArrayFromVector<TimeType, int64_t>(f1->type, is_valid, ts_values, &a1); - ArrayFromVector<TimeType, int64_t>(f2->type, is_valid, ts_values, &a2); + std::shared_ptr<Array> a0, a1, a2, a3; + ArrayFromVector<Time32Type, int32_t>(f0->type, is_valid, t32_values, &a0); + ArrayFromVector<Time64Type, int64_t>(f1->type, is_valid, t64_values, &a1); + ArrayFromVector<Time32Type, int32_t>(f2->type, is_valid, t32_values, &a2); + ArrayFromVector<Time64Type, int64_t>(f3->type, is_valid, t64_values, &a3); - ArrayVector arrays = {a0, a1, a2}; + ArrayVector arrays = {a0, a1, a2, a3}; *out = std::make_shared<RecordBatch>(schema, a0->length(), arrays); return Status::OK(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/ipc/writer.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc index dc991ab..e795ef9 100644 --- a/cpp/src/arrow/ipc/writer.cc +++ b/cpp/src/arrow/ipc/writer.cc @@ -334,8 +334,9 @@ class RecordBatchWriter : public ArrayVisitor { VISIT_FIXED_WIDTH(DoubleArray); VISIT_FIXED_WIDTH(Date32Array); VISIT_FIXED_WIDTH(Date64Array); - VISIT_FIXED_WIDTH(TimeArray); VISIT_FIXED_WIDTH(TimestampArray); + VISIT_FIXED_WIDTH(Time32Array); + VISIT_FIXED_WIDTH(Time64Array); #undef VISIT_FIXED_WIDTH http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/pretty_print.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc index fc5eed1..0f67fe5 100644 --- a/cpp/src/arrow/pretty_print.cc +++ b/cpp/src/arrow/pretty_print.cc @@ -27,20 +27,17 @@ #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/string.h" +#include "arrow/visitor_inline.h" namespace arrow { -class ArrayPrinter : public ArrayVisitor { +class ArrayPrinter { public: ArrayPrinter(const Array& array, int indent, std::ostream* sink) : array_(array), indent_(indent), sink_(sink) {} - Status Print() { return VisitArray(array_); } - - Status VisitArray(const Array& array) { return array.Accept(this); } - template <typename T> - typename std::enable_if<IsInteger<T>::value, void>::type WriteDataValues( + inline typename std::enable_if<IsInteger<T>::value, void>::type WriteDataValues( const T& array) { const auto data = array.raw_data(); for (int i = 0; i < array.length(); ++i) { @@ -54,7 +51,7 @@ class ArrayPrinter : public ArrayVisitor { } template <typename T> - typename std::enable_if<IsFloatingPoint<T>::value, void>::type WriteDataValues( + inline typename std::enable_if<IsFloatingPoint<T>::value, void>::type WriteDataValues( const T& array) { const auto data = array.raw_data(); for (int i = 0; i < array.length(); ++i) { @@ -69,7 +66,7 @@ class ArrayPrinter : public ArrayVisitor { // String (Utf8) template <typename T> - typename std::enable_if<std::is_same<StringArray, T>::value, void>::type + inline typename std::enable_if<std::is_same<StringArray, T>::value, void>::type WriteDataValues(const T& array) { int32_t length; for (int i = 0; i < array.length(); ++i) { @@ -85,7 +82,7 @@ class ArrayPrinter : public ArrayVisitor { // Binary template <typename T> - typename std::enable_if<std::is_same<BinaryArray, T>::value, void>::type + inline typename std::enable_if<std::is_same<BinaryArray, T>::value, void>::type WriteDataValues(const T& array) { int32_t length; for (int i = 0; i < array.length(); ++i) { @@ -100,8 +97,9 @@ class ArrayPrinter : public ArrayVisitor { } template <typename T> - typename std::enable_if<std::is_same<FixedWidthBinaryArray, T>::value, void>::type - WriteDataValues(const T& array) { + inline + typename std::enable_if<std::is_same<FixedWidthBinaryArray, T>::value, void>::type + WriteDataValues(const T& array) { int32_t width = array.byte_width(); for (int i = 0; i < array.length(); ++i) { if (i > 0) { (*sink_) << ", "; } @@ -115,7 +113,7 @@ class ArrayPrinter : public ArrayVisitor { } template <typename T> - typename std::enable_if<std::is_base_of<BooleanArray, T>::value, void>::type + inline typename std::enable_if<std::is_base_of<BooleanArray, T>::value, void>::type WriteDataValues(const T& array) { for (int i = 0; i < array.length(); ++i) { if (i > 0) { (*sink_) << ", "; } @@ -127,83 +125,34 @@ class ArrayPrinter : public ArrayVisitor { } } - void OpenArray() { (*sink_) << "["; } + void Write(const char* data); + void Write(const std::string& data); + void Newline(); + void Indent(); + void OpenArray(); + void CloseArray(); - void CloseArray() { (*sink_) << "]"; } + Status Visit(const NullArray& array) { return Status::OK(); } template <typename T> - Status WriteArray(const T& array) { + typename std::enable_if<std::is_base_of<PrimitiveArray, T>::value || + std::is_base_of<FixedWidthBinaryArray, T>::value || + std::is_base_of<BinaryArray, T>::value, + Status>::type + Visit(const T& array) { OpenArray(); WriteDataValues(array); CloseArray(); return Status::OK(); } - Status Visit(const NullArray& array) override { return Status::OK(); } - - Status Visit(const BooleanArray& array) override { return WriteArray(array); } - - Status Visit(const Int8Array& array) override { return WriteArray(array); } - - Status Visit(const Int16Array& array) override { return WriteArray(array); } - - Status Visit(const Int32Array& array) override { return WriteArray(array); } - - Status Visit(const Int64Array& array) override { return WriteArray(array); } - - Status Visit(const UInt8Array& array) override { return WriteArray(array); } - - Status Visit(const UInt16Array& array) override { return WriteArray(array); } - - Status Visit(const UInt32Array& array) override { return WriteArray(array); } - - Status Visit(const UInt64Array& array) override { return WriteArray(array); } - - Status Visit(const HalfFloatArray& array) override { return WriteArray(array); } - - Status Visit(const FloatArray& array) override { return WriteArray(array); } - - Status Visit(const DoubleArray& array) override { return WriteArray(array); } - - Status Visit(const StringArray& array) override { return WriteArray(array); } - - Status Visit(const BinaryArray& array) override { return WriteArray(array); } - - Status Visit(const FixedWidthBinaryArray& array) override { return WriteArray(array); } - - Status Visit(const Date32Array& array) override { return WriteArray(array); } - - Status Visit(const Date64Array& array) override { return WriteArray(array); } - - Status Visit(const TimeArray& array) override { return WriteArray(array); } - - Status Visit(const TimestampArray& array) override { - return Status::NotImplemented("timestamp"); - } - - Status Visit(const IntervalArray& array) override { - return Status::NotImplemented("interval"); - } + Status Visit(const IntervalArray& array) { return Status::NotImplemented("interval"); } - Status Visit(const DecimalArray& array) override { - return Status::NotImplemented("decimal"); - } + Status Visit(const DecimalArray& array) { return Status::NotImplemented("decimal"); } - Status WriteValidityBitmap(const Array& array) { - Newline(); - Write("-- is_valid: "); - - if (array.null_count() > 0) { - BooleanArray is_valid( - array.length(), array.null_bitmap(), nullptr, 0, array.offset()); - return PrettyPrint(is_valid, indent_ + 2, sink_); - } else { - Write("all not null"); - return Status::OK(); - } - } + Status WriteValidityBitmap(const Array& array); - Status Visit(const ListArray& array) override { + Status Visit(const ListArray& array) { RETURN_NOT_OK(WriteValidityBitmap(array)); Newline(); @@ -239,12 +188,12 @@ class ArrayPrinter : public ArrayVisitor { return Status::OK(); } - Status Visit(const StructArray& array) override { + Status Visit(const StructArray& array) { RETURN_NOT_OK(WriteValidityBitmap(array)); return PrintChildren(array.fields(), array.offset(), array.length()); } - Status Visit(const UnionArray& array) override { + Status Visit(const UnionArray& array) { RETURN_NOT_OK(WriteValidityBitmap(array)); Newline(); @@ -264,7 +213,7 @@ class ArrayPrinter : public ArrayVisitor { return PrintChildren(array.children(), 0, array.length() + array.offset()); } - Status Visit(const DictionaryArray& array) override { + Status Visit(const DictionaryArray& array) { RETURN_NOT_OK(WriteValidityBitmap(array)); Newline(); @@ -276,20 +225,7 @@ class ArrayPrinter : public ArrayVisitor { return PrettyPrint(*array.indices(), indent_ + 2, sink_); } - void Write(const char* data) { (*sink_) << data; } - - void Write(const std::string& data) { (*sink_) << data; } - - void Newline() { - (*sink_) << "\n"; - Indent(); - } - - void Indent() { - for (int i = 0; i < indent_; ++i) { - (*sink_) << " "; - } - } + Status Print() { return VisitArrayInline(array_, this); } private: const Array& array_; @@ -298,6 +234,46 @@ class ArrayPrinter : public ArrayVisitor { std::ostream* sink_; }; +Status ArrayPrinter::WriteValidityBitmap(const Array& array) { + Newline(); + Write("-- is_valid: "); + + if (array.null_count() > 0) { + BooleanArray is_valid( + array.length(), array.null_bitmap(), nullptr, 0, array.offset()); + return PrettyPrint(is_valid, indent_ + 2, sink_); + } else { + Write("all not null"); + return Status::OK(); + } +} + +void ArrayPrinter::OpenArray() { + (*sink_) << "["; +} +void ArrayPrinter::CloseArray() { + (*sink_) << "]"; +} + +void ArrayPrinter::Write(const char* data) { + (*sink_) << data; +} + +void ArrayPrinter::Write(const std::string& data) { + (*sink_) << data; +} + +void ArrayPrinter::Newline() { + (*sink_) << "\n"; + Indent(); +} + +void ArrayPrinter::Indent() { + for (int i = 0; i < indent_; ++i) { + (*sink_) << " "; + } +} + Status PrettyPrint(const Array& arr, int indent, std::ostream* sink) { ArrayPrinter printer(arr, indent, sink); return printer.Print(); http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/type-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc index c2d115c..b6a84df 100644 --- a/cpp/src/arrow/type-test.cc +++ b/cpp/src/arrow/type-test.cc @@ -182,26 +182,30 @@ TEST(TestDateTypes, ToString) { } TEST(TestTimeType, Equals) { - TimeType t1; - TimeType t2; - TimeType t3(TimeUnit::NANO); - TimeType t4(TimeUnit::NANO); - - ASSERT_TRUE(t1.Equals(t2)); + Time32Type t0; + Time32Type t1(TimeUnit::SECOND); + Time32Type t2(TimeUnit::MILLI); + Time64Type t3(TimeUnit::MICRO); + Time64Type t4(TimeUnit::NANO); + Time64Type t5(TimeUnit::MICRO); + + ASSERT_TRUE(t0.Equals(t2)); + ASSERT_TRUE(t1.Equals(t1)); ASSERT_FALSE(t1.Equals(t3)); - ASSERT_TRUE(t3.Equals(t4)); + ASSERT_FALSE(t3.Equals(t4)); + ASSERT_TRUE(t3.Equals(t5)); } TEST(TestTimeType, ToString) { - auto t1 = time(TimeUnit::MILLI); - auto t2 = time(TimeUnit::NANO); - auto t3 = time(TimeUnit::SECOND); - auto t4 = time(TimeUnit::MICRO); - - ASSERT_EQ("time[ms]", t1->ToString()); - ASSERT_EQ("time[ns]", t2->ToString()); - ASSERT_EQ("time[s]", t3->ToString()); - ASSERT_EQ("time[us]", t4->ToString()); + auto t1 = time32(TimeUnit::MILLI); + auto t2 = time64(TimeUnit::NANO); + auto t3 = time32(TimeUnit::SECOND); + auto t4 = time64(TimeUnit::MICRO); + + ASSERT_EQ("time32[ms]", t1->ToString()); + ASSERT_EQ("time64[ns]", t2->ToString()); + ASSERT_EQ("time32[s]", t3->ToString()); + ASSERT_EQ("time64[us]", t4->ToString()); } TEST(TestTimestampType, Equals) { http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/type.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index 1c61eb6..3885022 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -119,12 +119,34 @@ std::string Date32Type::ToString() const { return std::string("date32[day]"); } -std::string TimeType::ToString() const { +// ---------------------------------------------------------------------- +// Time types + +Time32Type::Time32Type(TimeUnit unit) : FixedWidthType(Type::TIME32), unit(unit) { + DCHECK(unit == TimeUnit::SECOND || unit == TimeUnit::MILLI) + << "Must be seconds or milliseconds"; +} + +std::string Time32Type::ToString() const { + std::stringstream ss; + ss << "time32[" << this->unit << "]"; + return ss.str(); +} + +Time64Type::Time64Type(TimeUnit unit) : FixedWidthType(Type::TIME64), unit(unit) { + DCHECK(unit == TimeUnit::MICRO || unit == TimeUnit::NANO) + << "Must be microseconds or nanoseconds"; +} + +std::string Time64Type::ToString() const { std::stringstream ss; - ss << "time[" << this->unit << "]"; + ss << "time64[" << this->unit << "]"; return ss.str(); } +// ---------------------------------------------------------------------- +// Timestamp types + std::string TimestampType::ToString() const { std::stringstream ss; ss << "timestamp[" << this->unit; @@ -138,7 +160,7 @@ std::string TimestampType::ToString() const { UnionType::UnionType(const std::vector<std::shared_ptr<Field>>& fields, const std::vector<uint8_t>& type_codes, UnionMode mode) - : DataType(Type::UNION), mode(mode), type_codes(type_codes) { + : NestedType(Type::UNION), mode(mode), type_codes(type_codes) { children_ = fields; } @@ -206,9 +228,10 @@ ACCEPT_VISITOR(ListType); ACCEPT_VISITOR(StructType); ACCEPT_VISITOR(DecimalType); ACCEPT_VISITOR(UnionType); -ACCEPT_VISITOR(Date64Type); ACCEPT_VISITOR(Date32Type); -ACCEPT_VISITOR(TimeType); +ACCEPT_VISITOR(Date64Type); +ACCEPT_VISITOR(Time32Type); +ACCEPT_VISITOR(Time64Type); ACCEPT_VISITOR(TimestampType); ACCEPT_VISITOR(IntervalType); ACCEPT_VISITOR(DictionaryType); @@ -249,8 +272,12 @@ std::shared_ptr<DataType> timestamp(TimeUnit unit, const std::string& timezone) return std::make_shared<TimestampType>(unit, timezone); } -std::shared_ptr<DataType> time(TimeUnit unit) { - return std::make_shared<TimeType>(unit); +std::shared_ptr<DataType> time32(TimeUnit unit) { + return std::make_shared<Time32Type>(unit); +} + +std::shared_ptr<DataType> time64(TimeUnit unit) { + return std::make_shared<Time64Type>(unit); } std::shared_ptr<DataType> list(const std::shared_ptr<DataType>& value_type) { http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/type.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 40c00a4..7ae5ae3 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -82,8 +82,13 @@ struct Type { // Default unit millisecond TIMESTAMP, - // Exact time encoded with int64, default unit millisecond - TIME, + // Time as signed 32-bit integer, representing either seconds or + // milliseconds since midnight + TIME32, + + // Time as signed 64-bit integer, representing either microseconds or + // nanoseconds since midnight + TIME64, // YEAR_MONTH or DAY_TIME interval in SQL style INTERVAL, @@ -147,6 +152,9 @@ struct ARROW_EXPORT DataType { virtual std::string ToString() const = 0; virtual std::vector<BufferDescr> GetBufferLayout() const = 0; + + private: + DISALLOW_COPY_AND_ASSIGN(DataType); }; typedef std::shared_ptr<DataType> TypePtr; @@ -168,6 +176,10 @@ struct ARROW_EXPORT FloatingPointMeta { virtual Precision precision() const = 0; }; +struct ARROW_EXPORT NestedType : public DataType { + using DataType::DataType; +}; + struct NoExtraMeta {}; // A field is a piece of metadata that includes (for now) a name and a data @@ -298,14 +310,14 @@ struct ARROW_EXPORT DoubleType : public CTypeImpl<DoubleType, Type::DOUBLE, doub static std::string name() { return "double"; } }; -struct ARROW_EXPORT ListType : public DataType, public NoExtraMeta { +struct ARROW_EXPORT ListType : public NestedType { static constexpr Type::type type_id = Type::LIST; // List can contain any other logical value type explicit ListType(const std::shared_ptr<DataType>& value_type) : ListType(std::make_shared<Field>("item", value_type)) {} - explicit ListType(const std::shared_ptr<Field>& value_field) : DataType(Type::LIST) { + explicit ListType(const std::shared_ptr<Field>& value_field) : NestedType(Type::LIST) { children_ = {value_field}; } @@ -369,11 +381,11 @@ struct ARROW_EXPORT StringType : public BinaryType { static std::string name() { return "utf8"; } }; -struct ARROW_EXPORT StructType : public DataType, public NoExtraMeta { +struct ARROW_EXPORT StructType : public NestedType { static constexpr Type::type type_id = Type::STRUCT; explicit StructType(const std::vector<std::shared_ptr<Field>>& fields) - : DataType(Type::STRUCT) { + : NestedType(Type::STRUCT) { children_ = fields; } @@ -401,7 +413,7 @@ struct ARROW_EXPORT DecimalType : public DataType { enum class UnionMode : char { SPARSE, DENSE }; -struct ARROW_EXPORT UnionType : public DataType { +struct ARROW_EXPORT UnionType : public NestedType { static constexpr Type::type type_id = Type::UNION; UnionType(const std::vector<std::shared_ptr<Field>>& fields, @@ -473,8 +485,23 @@ static inline std::ostream& operator<<(std::ostream& os, TimeUnit unit) { return os; } -struct ARROW_EXPORT TimeType : public FixedWidthType { - static constexpr Type::type type_id = Type::TIME; +struct ARROW_EXPORT Time32Type : public FixedWidthType { + static constexpr Type::type type_id = Type::TIME32; + using Unit = TimeUnit; + using c_type = int32_t; + + TimeUnit unit; + + int bit_width() const override { return static_cast<int>(sizeof(c_type) * 4); } + + explicit Time32Type(TimeUnit unit = TimeUnit::MILLI); + + Status Accept(TypeVisitor* visitor) const override; + std::string ToString() const override; +}; + +struct ARROW_EXPORT Time64Type : public FixedWidthType { + static constexpr Type::type type_id = Type::TIME64; using Unit = TimeUnit; using c_type = int64_t; @@ -482,9 +509,7 @@ struct ARROW_EXPORT TimeType : public FixedWidthType { int bit_width() const override { return static_cast<int>(sizeof(c_type) * 8); } - explicit TimeType(TimeUnit unit = TimeUnit::MILLI) - : FixedWidthType(Type::TIME), unit(unit) {} - TimeType(const TimeType& other) : TimeType(other.unit) {} + explicit Time64Type(TimeUnit unit = TimeUnit::MILLI); Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override; @@ -504,8 +529,6 @@ struct ARROW_EXPORT TimestampType : public FixedWidthType { explicit TimestampType(TimeUnit unit, const std::string& timezone) : FixedWidthType(Type::TIMESTAMP), unit(unit), timezone(timezone) {} - TimestampType(const TimestampType& other) : TimestampType(other.unit) {} - Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override; static std::string name() { return "timestamp"; } @@ -527,8 +550,6 @@ struct ARROW_EXPORT IntervalType : public FixedWidthType { explicit IntervalType(Unit unit = Unit::YEAR_MONTH) : FixedWidthType(Type::INTERVAL), unit(unit) {} - IntervalType(const IntervalType& other) : IntervalType(other.unit) {} - Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override { return name(); } static std::string name() { return "date"; } @@ -573,7 +594,12 @@ std::shared_ptr<DataType> ARROW_EXPORT list(const std::shared_ptr<DataType>& val std::shared_ptr<DataType> ARROW_EXPORT timestamp(TimeUnit unit); std::shared_ptr<DataType> ARROW_EXPORT timestamp( TimeUnit unit, const std::string& timezone); -std::shared_ptr<DataType> ARROW_EXPORT time(TimeUnit unit); + +/// Unit can be either SECOND or MILLI +std::shared_ptr<DataType> ARROW_EXPORT time32(TimeUnit unit); + +/// Unit can be either MICRO or NANO +std::shared_ptr<DataType> ARROW_EXPORT time64(TimeUnit unit); std::shared_ptr<DataType> ARROW_EXPORT struct_( const std::vector<std::shared_ptr<Field>>& fields); @@ -637,8 +663,9 @@ static inline bool is_primitive(Type::type type_id) { case Type::DOUBLE: case Type::DATE32: case Type::DATE64: + case Type::TIME32: + case Type::TIME64: case Type::TIMESTAMP: - case Type::TIME: case Type::INTERVAL: return true; default: http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/type_fwd.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h index f62c031..201f4e9 100644 --- a/cpp/src/arrow/type_fwd.h +++ b/cpp/src/arrow/type_fwd.h @@ -105,9 +105,13 @@ struct Date32Type; using Date32Array = NumericArray<Date32Type>; using Date32Builder = NumericBuilder<Date32Type>; -struct TimeType; -using TimeArray = NumericArray<TimeType>; -using TimeBuilder = NumericBuilder<TimeType>; +struct Time32Type; +using Time32Array = NumericArray<Time32Type>; +using Time32Builder = NumericBuilder<Time32Type>; + +struct Time64Type; +using Time64Array = NumericArray<Time64Type>; +using Time64Builder = NumericBuilder<Time64Type>; struct TimestampType; using TimestampArray = NumericArray<TimestampType>; @@ -134,6 +138,7 @@ std::shared_ptr<DataType> ARROW_EXPORT float32(); std::shared_ptr<DataType> ARROW_EXPORT float64(); std::shared_ptr<DataType> ARROW_EXPORT utf8(); std::shared_ptr<DataType> ARROW_EXPORT binary(); + std::shared_ptr<DataType> ARROW_EXPORT date32(); std::shared_ptr<DataType> ARROW_EXPORT date64(); http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/type_traits.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index e731913..f735d27 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -29,6 +29,12 @@ template <typename T> struct TypeTraits {}; template <> +struct TypeTraits<NullType> { + using ArrayType = NullArray; + constexpr static bool is_parameter_free = false; +}; + +template <> struct TypeTraits<UInt8Type> { using ArrayType = UInt8Array; using BuilderType = UInt8Builder; @@ -154,9 +160,20 @@ struct TypeTraits<TimestampType> { }; template <> -struct TypeTraits<TimeType> { - using ArrayType = TimeArray; - using BuilderType = TimeBuilder; +struct TypeTraits<Time32Type> { + using ArrayType = Time32Array; + using BuilderType = Time32Builder; + + static inline int64_t bytes_required(int64_t elements) { + return elements * sizeof(int32_t); + } + constexpr static bool is_parameter_free = false; +}; + +template <> +struct TypeTraits<Time64Type> { + using ArrayType = Time64Array; + using BuilderType = Time64Builder; static inline int64_t bytes_required(int64_t elements) { return elements * sizeof(int64_t); @@ -235,6 +252,32 @@ struct TypeTraits<FixedWidthBinaryType> { constexpr static bool is_parameter_free = false; }; +template <> +struct TypeTraits<ListType> { + using ArrayType = ListArray; + using BuilderType = ListBuilder; + constexpr static bool is_parameter_free = false; +}; + +template <> +struct TypeTraits<StructType> { + using ArrayType = StructArray; + using BuilderType = StructBuilder; + constexpr static bool is_parameter_free = false; +}; + +template <> +struct TypeTraits<UnionType> { + using ArrayType = UnionArray; + constexpr static bool is_parameter_free = false; +}; + +template <> +struct TypeTraits<DictionaryType> { + using ArrayType = DictionaryArray; + constexpr static bool is_parameter_free = false; +}; + // Not all type classes have a c_type template <typename T> struct as_void { http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/visitor.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/visitor.cc b/cpp/src/arrow/visitor.cc index 181e932..9200e0f 100644 --- a/cpp/src/arrow/visitor.cc +++ b/cpp/src/arrow/visitor.cc @@ -46,7 +46,8 @@ ARRAY_VISITOR_DEFAULT(StringArray); ARRAY_VISITOR_DEFAULT(FixedWidthBinaryArray); ARRAY_VISITOR_DEFAULT(Date32Array); ARRAY_VISITOR_DEFAULT(Date64Array); -ARRAY_VISITOR_DEFAULT(TimeArray); +ARRAY_VISITOR_DEFAULT(Time32Array); +ARRAY_VISITOR_DEFAULT(Time64Array); ARRAY_VISITOR_DEFAULT(TimestampArray); ARRAY_VISITOR_DEFAULT(IntervalArray); ARRAY_VISITOR_DEFAULT(ListArray); @@ -84,7 +85,8 @@ TYPE_VISITOR_DEFAULT(BinaryType); TYPE_VISITOR_DEFAULT(FixedWidthBinaryType); TYPE_VISITOR_DEFAULT(Date64Type); TYPE_VISITOR_DEFAULT(Date32Type); -TYPE_VISITOR_DEFAULT(TimeType); +TYPE_VISITOR_DEFAULT(Time32Type); +TYPE_VISITOR_DEFAULT(Time64Type); TYPE_VISITOR_DEFAULT(TimestampType); TYPE_VISITOR_DEFAULT(IntervalType); TYPE_VISITOR_DEFAULT(DecimalType); http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/visitor.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/visitor.h b/cpp/src/arrow/visitor.h index a9c59c8..d44dcf6 100644 --- a/cpp/src/arrow/visitor.h +++ b/cpp/src/arrow/visitor.h @@ -46,7 +46,8 @@ class ARROW_EXPORT ArrayVisitor { virtual Status Visit(const FixedWidthBinaryArray& array); virtual Status Visit(const Date32Array& array); virtual Status Visit(const Date64Array& array); - virtual Status Visit(const TimeArray& array); + virtual Status Visit(const Time32Array& array); + virtual Status Visit(const Time64Array& array); virtual Status Visit(const TimestampArray& array); virtual Status Visit(const IntervalArray& array); virtual Status Visit(const DecimalArray& array); @@ -78,7 +79,8 @@ class ARROW_EXPORT TypeVisitor { virtual Status Visit(const FixedWidthBinaryType& type); virtual Status Visit(const Date64Type& type); virtual Status Visit(const Date32Type& type); - virtual Status Visit(const TimeType& type); + virtual Status Visit(const Time32Type& type); + virtual Status Visit(const Time64Type& type); virtual Status Visit(const TimestampType& type); virtual Status Visit(const IntervalType& type); virtual Status Visit(const DecimalType& type); http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/cpp/src/arrow/visitor_inline.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/visitor_inline.h b/cpp/src/arrow/visitor_inline.h index b69468d..0ea16bc 100644 --- a/cpp/src/arrow/visitor_inline.h +++ b/cpp/src/arrow/visitor_inline.h @@ -51,7 +51,8 @@ inline Status VisitTypeInline(const DataType& type, VISITOR* visitor) { TYPE_VISIT_INLINE(Date32Type); TYPE_VISIT_INLINE(Date64Type); TYPE_VISIT_INLINE(TimestampType); - TYPE_VISIT_INLINE(TimeType); + TYPE_VISIT_INLINE(Time32Type); + TYPE_VISIT_INLINE(Time64Type); TYPE_VISIT_INLINE(ListType); TYPE_VISIT_INLINE(StructType); TYPE_VISIT_INLINE(UnionType); @@ -62,6 +63,44 @@ inline Status VisitTypeInline(const DataType& type, VISITOR* visitor) { return Status::NotImplemented("Type not implemented"); } +#define ARRAY_VISIT_INLINE(TYPE_CLASS) \ + case TYPE_CLASS::type_id: \ + return visitor->Visit( \ + static_cast<const typename TypeTraits<TYPE_CLASS>::ArrayType&>(array)); + +template <typename VISITOR> +inline Status VisitArrayInline(const Array& array, VISITOR* visitor) { + switch (array.type_enum()) { + ARRAY_VISIT_INLINE(NullType); + ARRAY_VISIT_INLINE(BooleanType); + ARRAY_VISIT_INLINE(Int8Type); + ARRAY_VISIT_INLINE(UInt8Type); + ARRAY_VISIT_INLINE(Int16Type); + ARRAY_VISIT_INLINE(UInt16Type); + ARRAY_VISIT_INLINE(Int32Type); + ARRAY_VISIT_INLINE(UInt32Type); + ARRAY_VISIT_INLINE(Int64Type); + ARRAY_VISIT_INLINE(UInt64Type); + ARRAY_VISIT_INLINE(FloatType); + ARRAY_VISIT_INLINE(DoubleType); + ARRAY_VISIT_INLINE(StringType); + ARRAY_VISIT_INLINE(BinaryType); + ARRAY_VISIT_INLINE(FixedWidthBinaryType); + ARRAY_VISIT_INLINE(Date32Type); + ARRAY_VISIT_INLINE(Date64Type); + ARRAY_VISIT_INLINE(TimestampType); + ARRAY_VISIT_INLINE(Time32Type); + ARRAY_VISIT_INLINE(Time64Type); + ARRAY_VISIT_INLINE(ListType); + ARRAY_VISIT_INLINE(StructType); + ARRAY_VISIT_INLINE(UnionType); + ARRAY_VISIT_INLINE(DictionaryType); + default: + break; + } + return Status::NotImplemented("Type not implemented"); +} + } // namespace arrow #endif // ARROW_VISITOR_INLINE_H http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/python/pyarrow/array.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx index 795076c..654f5ab 100644 --- a/python/pyarrow/array.pyx +++ b/python/pyarrow/array.pyx @@ -383,6 +383,12 @@ cdef class Date64Array(NumericArray): cdef class TimestampArray(NumericArray): pass +cdef class Time32Array(NumericArray): + pass + + +cdef class Time64Array(NumericArray): + pass cdef class FloatArray(FloatingPointArray): pass @@ -490,12 +496,14 @@ cdef dict _array_classes = { Type_INT64: Int64Array, Type_DATE32: Date32Array, Type_DATE64: Date64Array, + Type_TIMESTAMP: TimestampArray, + Type_TIME32: Time32Array, + Type_TIME64: Time64Array, Type_FLOAT: FloatArray, Type_DOUBLE: DoubleArray, Type_LIST: ListArray, Type_BINARY: BinaryArray, Type_STRING: StringArray, - Type_TIMESTAMP: TimestampArray, Type_DICTIONARY: DictionaryArray } http://git-wip-us.apache.org/repos/asf/arrow/blob/60b5832e/python/pyarrow/includes/libarrow.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 1d9c38e..bdbd18b 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -38,9 +38,11 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: Type_FLOAT" arrow::Type::FLOAT" Type_DOUBLE" arrow::Type::DOUBLE" - Type_TIMESTAMP" arrow::Type::TIMESTAMP" Type_DATE32" arrow::Type::DATE32" Type_DATE64" arrow::Type::DATE64" + Type_TIMESTAMP" arrow::Type::TIMESTAMP" + Type_TIME32" arrow::Type::TIME32" + Type_TIME64" arrow::Type::TIME64" Type_BINARY" arrow::Type::BINARY" Type_STRING" arrow::Type::STRING" @@ -85,11 +87,20 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: shared_ptr[CArray] indices() shared_ptr[CArray] dictionary() + cdef cppclass CDate32Type" arrow::Date32Type"(CFixedWidthType): + pass + + cdef cppclass CDate64Type" arrow::Date64Type"(CFixedWidthType): + pass + cdef cppclass CTimestampType" arrow::TimestampType"(CFixedWidthType): TimeUnit unit c_string timezone - cdef cppclass CTimeType" arrow::TimeType"(CFixedWidthType): + cdef cppclass CTime32Type" arrow::Time32Type"(CFixedWidthType): + TimeUnit unit + + cdef cppclass CTime64Type" arrow::Time64Type"(CFixedWidthType): TimeUnit unit cdef cppclass CDictionaryType" arrow::DictionaryType"(CFixedWidthType):