This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new d831e2c ARROW-47: [C++] Preliminary arrow::Scalar object model
d831e2c is described below
commit d831e2ce5931f520dc8be2ab3cf2d243999f85ed
Author: Wes McKinney <[email protected]>
AuthorDate: Wed Feb 13 09:26:12 2019 -0600
ARROW-47: [C++] Preliminary arrow::Scalar object model
This is the start of a Scalar object model suitable for static and dynamic
dispatch to correspond with the existing array and array builder types.
I modified the first aggregation kernel (sum) to use these types for
outputs.
Author: Wes McKinney <[email protected]>
Closes #3604 from wesm/ARROW-47 and squashes the following commits:
0d01bb3fc <Wes McKinney> Fix unit test on MSVC for small integer types
d57f7aa3f <Wes McKinney> Remove ARROW_GTEST_VENDORED
03ca01cd0 <Wes McKinney> Changes because MSVC tries to instantiate
NumericScalar for Time/Timestamp types
271d60216 <Wes McKinney> Add notes that Scalar API is experimental
e4a13b4c6 <Wes McKinney> flake
66260358c <Wes McKinney> Fix up date/time scalars, add tests
704daee83 <Wes McKinney> Code review comments
d922bd282 <Wes McKinney> Use new Scalar objects in aggregation code
fa89bd077 <Wes McKinney> Drafting, untested
94d5e6242 <Wes McKinney> start
---
cpp/build-support/run_cpplint.py | 2 +
cpp/cmake_modules/ThirdpartyToolchain.cmake | 1 +
cpp/src/arrow/CMakeLists.txt | 4 +-
cpp/src/arrow/compute/compute-test.cc | 3 +
cpp/src/arrow/compute/kernel.h | 61 ++------
cpp/src/arrow/compute/kernels/aggregate-test.cc | 69 ++++-----
cpp/src/arrow/compute/kernels/sum.cc | 23 ++-
cpp/src/arrow/compute/kernels/sum.h | 19 ++-
cpp/src/arrow/scalar-test.cc | 189 ++++++++++++++++++++++++
cpp/src/arrow/scalar.cc | 69 +++++++++
cpp/src/arrow/scalar.h | 152 +++++++++++++++++++
cpp/src/arrow/testing/gtest_util.h | 7 +
cpp/src/arrow/type_fwd.h | 19 +++
cpp/src/arrow/type_traits.h | 52 ++++---
run-cmake-format.py | 3 +-
15 files changed, 554 insertions(+), 119 deletions(-)
diff --git a/cpp/build-support/run_cpplint.py b/cpp/build-support/run_cpplint.py
index 035a02e..171f173 100755
--- a/cpp/build-support/run_cpplint.py
+++ b/cpp/build-support/run_cpplint.py
@@ -112,6 +112,8 @@ if __name__ == "__main__":
if problem_files:
msg = "{} had cpplint issues"
print("\n".join(map(msg.format, problem_files)))
+ if isinstance(stdout, bytes):
+ stdout = stdout.decode('utf8')
print(stdout, file=sys.stderr)
error = True
except Exception:
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 51bd39d..94ce527 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -676,6 +676,7 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
endif()
set(GTEST_PREFIX
"${CMAKE_CURRENT_BINARY_DIR}/googletest_ep-prefix/src/googletest_ep")
+ set(GTEST_HOME ${GTEST_PREFIX})
set(GTEST_INCLUDE_DIR "${GTEST_PREFIX}/include")
set(GTEST_STATIC_LIB
"${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_GTEST_DEBUG_EXTENSION}${CMAKE_STATIC_LIBRARY_SUFFIX}")
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index aa09968..b2aebe6 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -90,11 +90,12 @@ set(ARROW_SRCS
memory_pool.cc
pretty_print.cc
record_batch.cc
+ scalar.cc
+ sparse_tensor.cc
status.cc
table.cc
table_builder.cc
tensor.cc
- sparse_tensor.cc
type.cc
visitor.cc
csv/converter.cc
@@ -306,6 +307,7 @@ add_arrow_test(buffer-test)
add_arrow_test(memory_pool-test)
add_arrow_test(pretty_print-test)
add_arrow_test(public-api-test)
+add_arrow_test(scalar-test)
add_arrow_test(status-test)
add_arrow_test(stl-test)
add_arrow_test(type-test)
diff --git a/cpp/src/arrow/compute/compute-test.cc
b/cpp/src/arrow/compute/compute-test.cc
index 0d18695..a90dad1 100644
--- a/cpp/src/arrow/compute/compute-test.cc
+++ b/cpp/src/arrow/compute/compute-test.cc
@@ -59,6 +59,8 @@ void CheckImplicitConstructor(enum Datum::type expected_kind)
{
}
TEST(TestDatum, ImplicitConstructors) {
+ CheckImplicitConstructor<Scalar>(Datum::SCALAR);
+
CheckImplicitConstructor<Array>(Datum::ARRAY);
// Instantiate from array subclass
@@ -66,6 +68,7 @@ TEST(TestDatum, ImplicitConstructors) {
CheckImplicitConstructor<ChunkedArray>(Datum::CHUNKED_ARRAY);
CheckImplicitConstructor<RecordBatch>(Datum::RECORD_BATCH);
+
CheckImplicitConstructor<Table>(Datum::TABLE);
}
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 2270a48..38b78ca 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -24,6 +24,7 @@
#include "arrow/array.h"
#include "arrow/record_batch.h"
+#include "arrow/scalar.h"
#include "arrow/table.h"
#include "arrow/util/macros.h"
#include "arrow/util/variant.h" // IWYU pragma: export
@@ -55,60 +56,12 @@ class ARROW_EXPORT OpKernel {
virtual ~OpKernel() = default;
};
-/// \brief Placeholder for Scalar values until we implement these
-struct ARROW_EXPORT Scalar {
- util::variant<bool, uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t,
uint64_t,
- int64_t, float, double>
- value;
-
- explicit Scalar(bool value) : value(value) {}
- explicit Scalar(uint8_t value) : value(value) {}
- explicit Scalar(int8_t value) : value(value) {}
- explicit Scalar(uint16_t value) : value(value) {}
- explicit Scalar(int16_t value) : value(value) {}
- explicit Scalar(uint32_t value) : value(value) {}
- explicit Scalar(int32_t value) : value(value) {}
- explicit Scalar(uint64_t value) : value(value) {}
- explicit Scalar(int64_t value) : value(value) {}
- explicit Scalar(float value) : value(value) {}
- explicit Scalar(double value) : value(value) {}
-
- Type::type kind() const {
- switch (this->value.which()) {
- case 0:
- return Type::BOOL;
- case 1:
- return Type::UINT8;
- case 2:
- return Type::INT8;
- case 3:
- return Type::UINT16;
- case 4:
- return Type::INT16;
- case 5:
- return Type::UINT32;
- case 6:
- return Type::INT32;
- case 7:
- return Type::UINT64;
- case 8:
- return Type::INT64;
- case 9:
- return Type::FLOAT;
- case 10:
- return Type::DOUBLE;
- default:
- return Type::NA;
- }
- }
-};
-
/// \class Datum
/// \brief Variant type for various Arrow C++ data structures
struct ARROW_EXPORT Datum {
enum type { NONE, SCALAR, ARRAY, CHUNKED_ARRAY, RECORD_BATCH, TABLE,
COLLECTION };
- util::variant<decltype(NULLPTR), Scalar, std::shared_ptr<ArrayData>,
+ util::variant<decltype(NULLPTR), std::shared_ptr<Scalar>,
std::shared_ptr<ArrayData>,
std::shared_ptr<ChunkedArray>, std::shared_ptr<RecordBatch>,
std::shared_ptr<Table>, std::vector<Datum>>
value;
@@ -116,7 +69,7 @@ struct ARROW_EXPORT Datum {
/// \brief Empty datum, to be populated elsewhere
Datum() : value(NULLPTR) {}
- Datum(const Scalar& value) // NOLINT implicit conversion
+ Datum(const std::shared_ptr<Scalar>& value) // NOLINT implicit conversion
: value(value) {}
Datum(const std::shared_ptr<ArrayData>& value) // NOLINT implicit conversion
: value(value) {}
@@ -188,7 +141,9 @@ struct ARROW_EXPORT Datum {
return util::get<std::vector<Datum>>(this->value);
}
- Scalar scalar() const { return util::get<Scalar>(this->value); }
+ std::shared_ptr<Scalar> scalar() const {
+ return util::get<std::shared_ptr<Scalar>>(this->value);
+ }
bool is_array() const { return this->kind() == Datum::ARRAY; }
@@ -196,6 +151,8 @@ struct ARROW_EXPORT Datum {
return this->kind() == Datum::ARRAY || this->kind() ==
Datum::CHUNKED_ARRAY;
}
+ bool is_scalar() const { return this->kind() == Datum::SCALAR; }
+
/// \brief The value type of the variant, if any
///
/// \return nullptr if no type
@@ -204,6 +161,8 @@ struct ARROW_EXPORT Datum {
return util::get<std::shared_ptr<ArrayData>>(this->value)->type;
} else if (this->kind() == Datum::CHUNKED_ARRAY) {
return util::get<std::shared_ptr<ChunkedArray>>(this->value)->type();
+ } else if (this->kind() == Datum::SCALAR) {
+ return util::get<std::shared_ptr<Scalar>>(this->value)->type;
}
return NULLPTR;
}
diff --git a/cpp/src/arrow/compute/kernels/aggregate-test.cc
b/cpp/src/arrow/compute/kernels/aggregate-test.cc
index 160b162..d522e1b 100644
--- a/cpp/src/arrow/compute/kernels/aggregate-test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate-test.cc
@@ -25,6 +25,8 @@
#include "arrow/compute/kernels/sum.h"
#include "arrow/compute/test-util.h"
#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
#include "arrow/testing/gtest_common.h"
#include "arrow/testing/gtest_util.h"
@@ -36,47 +38,47 @@ using std::vector;
namespace arrow {
namespace compute {
-template <typename CType, typename Enable = void>
+template <typename Type, typename Enable = void>
struct DatumEqual {
static void EnsureEqual(const Datum& lhs, const Datum& rhs) {}
};
-template <typename CType>
-struct DatumEqual<CType,
- typename
std::enable_if<std::is_floating_point<CType>::value>::type> {
+template <typename Type>
+struct DatumEqual<Type, typename
std::enable_if<IsFloatingPoint<Type>::Value>::type> {
static constexpr double kArbitraryDoubleErrorBound = 1.0;
+ using ScalarType = typename TypeTraits<Type>::ScalarType;
static void EnsureEqual(const Datum& lhs, const Datum& rhs) {
ASSERT_EQ(lhs.kind(), rhs.kind());
if (lhs.kind() == Datum::SCALAR) {
- ASSERT_EQ(lhs.scalar().kind(), rhs.scalar().kind());
- ASSERT_NEAR(util::get<CType>(lhs.scalar().value),
- util::get<CType>(rhs.scalar().value),
kArbitraryDoubleErrorBound);
+ auto left = static_cast<const ScalarType*>(lhs.scalar().get());
+ auto right = static_cast<const ScalarType*>(rhs.scalar().get());
+ ASSERT_EQ(left->type->id(), right->type->id());
+ ASSERT_NEAR(left->value, right->value, kArbitraryDoubleErrorBound);
}
}
};
-template <typename CType>
-struct DatumEqual<CType,
- typename
std::enable_if<!std::is_floating_point<CType>::value>::type> {
+template <typename Type>
+struct DatumEqual<Type, typename
std::enable_if<!IsFloatingPoint<Type>::value>::type> {
+ using ScalarType = typename TypeTraits<Type>::ScalarType;
static void EnsureEqual(const Datum& lhs, const Datum& rhs) {
ASSERT_EQ(lhs.kind(), rhs.kind());
if (lhs.kind() == Datum::SCALAR) {
- ASSERT_EQ(lhs.scalar().kind(), rhs.scalar().kind());
- ASSERT_EQ(util::get<CType>(lhs.scalar().value),
- util::get<CType>(rhs.scalar().value));
+ auto left = static_cast<const ScalarType*>(lhs.scalar().get());
+ auto right = static_cast<const ScalarType*>(rhs.scalar().get());
+ ASSERT_EQ(left->type->id(), right->type->id());
+ ASSERT_EQ(left->value, right->value);
}
}
};
template <typename ArrowType>
void ValidateSum(FunctionContext* ctx, const Array& input, Datum expected) {
- using CType = typename ArrowType::c_type;
- using SumType = typename FindAccumulatorType<CType>::Type;
-
+ using OutputType = typename FindAccumulatorType<ArrowType>::Type;
Datum result;
ASSERT_OK(Sum(ctx, input, &result));
- DatumEqual<SumType>::EnsureEqual(result, expected);
+ DatumEqual<OutputType>::EnsureEqual(result, expected);
}
template <typename ArrowType>
@@ -87,11 +89,11 @@ void ValidateSum(FunctionContext* ctx, const char* json,
Datum expected) {
template <typename ArrowType>
static Datum DummySum(const Array& array) {
- using CType = typename ArrowType::c_type;
using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
- using SumType = typename FindAccumulatorType<CType>::Type;
+ using SumType = typename FindAccumulatorType<ArrowType>::Type;
+ using SumScalarType = typename TypeTraits<SumType>::ScalarType;
- SumType sum = 0;
+ typename SumType::c_type sum = 0;
int64_t count = 0;
const auto& array_numeric = reinterpret_cast<const ArrayType&>(array);
@@ -104,7 +106,11 @@ static Datum DummySum(const Array& array) {
}
}
- return (count > 0) ? Datum(Scalar(sum)) : Datum();
+ if (count > 0) {
+ return Datum(std::make_shared<SumScalarType>(sum));
+ } else {
+ return Datum(std::make_shared<SumScalarType>(0, false));
+ }
}
template <typename ArrowType>
@@ -115,24 +121,21 @@ void ValidateSum(FunctionContext* ctx, const Array&
array) {
template <typename ArrowType>
class TestSumKernelNumeric : public ComputeFixture, public TestBase {};
-typedef ::testing::Types<Int8Type, UInt8Type, Int16Type, UInt16Type, Int32Type,
- UInt32Type, Int64Type, UInt64Type, FloatType,
DoubleType>
- NumericArrowTypes;
-
TYPED_TEST_CASE(TestSumKernelNumeric, NumericArrowTypes);
TYPED_TEST(TestSumKernelNumeric, SimpleSum) {
- using CType = typename TypeParam::c_type;
- using SumType = typename FindAccumulatorType<CType>::Type;
+ using SumType = typename FindAccumulatorType<TypeParam>::Type;
+ using ScalarType = typename TypeTraits<SumType>::ScalarType;
+ using T = typename TypeParam::c_type;
- ValidateSum<TypeParam>(&this->ctx_, "[]", Datum());
+ ValidateSum<TypeParam>(&this->ctx_, "[]",
+ Datum(std::make_shared<ScalarType>(0, false)));
ValidateSum<TypeParam>(&this->ctx_, "[0, 1, 2, 3, 4, 5]",
- Datum(Scalar(static_cast<SumType>(5 * 6 / 2))));
+ Datum(std::make_shared<ScalarType>(static_cast<T>(5 *
6 / 2))));
- // Avoid this tests for (U)Int8Type
- if (sizeof(CType) > 1)
- ValidateSum<TypeParam>(&this->ctx_, "[1000, null, 300, null, 30, null, 7]",
- Datum(Scalar(static_cast<SumType>(1337))));
+ const T expected_result = static_cast<T>(14);
+ ValidateSum<TypeParam>(&this->ctx_, "[1, null, 3, null, 3, null, 7]",
+ Datum(std::make_shared<ScalarType>(expected_result)));
}
template <typename ArrowType>
diff --git a/cpp/src/arrow/compute/kernels/sum.cc
b/cpp/src/arrow/compute/kernels/sum.cc
index cb37c4a..007412a 100644
--- a/cpp/src/arrow/compute/kernels/sum.cc
+++ b/cpp/src/arrow/compute/kernels/sum.cc
@@ -28,9 +28,10 @@
namespace arrow {
namespace compute {
-template <typename CType, typename SumType = typename
FindAccumulatorType<CType>::Type>
+template <typename ArrowType,
+ typename SumType = typename FindAccumulatorType<ArrowType>::Type>
struct SumState {
- using ThisType = SumState<CType, SumType>;
+ using ThisType = SumState<ArrowType, SumType>;
ThisType operator+(const ThisType& rhs) const {
return ThisType(this->count + rhs.count, this->sum + rhs.sum);
@@ -43,11 +44,16 @@ struct SumState {
return *this;
}
+ std::shared_ptr<Scalar> AsScalar() const {
+ using ScalarType = typename TypeTraits<SumType>::ScalarType;
+ return std::make_shared<ScalarType>(this->sum);
+ }
+
size_t count = 0;
- SumType sum = 0;
+ typename SumType::c_type sum = 0;
};
-template <typename ArrowType, typename StateType = SumState<typename
ArrowType::c_type>>
+template <typename ArrowType, typename StateType = SumState<ArrowType>>
class SumAggregateFunction final : public
AggregateFunctionStaticState<StateType> {
using CType = typename TypeTraits<ArrowType>::CType;
using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
@@ -71,7 +77,12 @@ class SumAggregateFunction final : public
AggregateFunctionStaticState<StateType
}
Status Finalize(const StateType& src, Datum* output) const override {
- *output = (src.count > 0) ? Datum(Scalar(src.sum)) : Datum();
+ auto boxed = src.AsScalar();
+ if (src.count == 0) {
+ // TODO(wesm): Currently null, but fix this
+ boxed->is_valid = false;
+ }
+ *output = boxed;
return Status::OK();
}
@@ -185,7 +196,7 @@ Status Sum(FunctionContext* ctx, const Datum& value, Datum*
out) {
}
Status Sum(FunctionContext* ctx, const Array& array, Datum* out) {
- return Sum(ctx, Datum(array.data()), out);
+ return Sum(ctx, array.data(), out);
}
} // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/sum.h
b/cpp/src/arrow/compute/kernels/sum.h
index 2e2ca3c..88da2ac 100644
--- a/cpp/src/arrow/compute/kernels/sum.h
+++ b/cpp/src/arrow/compute/kernels/sum.h
@@ -22,6 +22,8 @@
#include <type_traits>
#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
#include "arrow/util/visibility.h"
namespace arrow {
@@ -34,25 +36,22 @@ namespace compute {
// Find the largest compatible primitive type for a primitive type.
template <typename I, typename Enable = void>
struct FindAccumulatorType {
- using Type = double;
+ using Type = DoubleType;
};
template <typename I>
-struct FindAccumulatorType<I, typename
std::enable_if<std::is_integral<I>::value &&
-
std::is_signed<I>::value>::type> {
- using Type = int64_t;
+struct FindAccumulatorType<I, typename
std::enable_if<IsSignedInt<I>::value>::type> {
+ using Type = Int64Type;
};
template <typename I>
-struct FindAccumulatorType<I, typename
std::enable_if<std::is_integral<I>::value &&
-
std::is_unsigned<I>::value>::type> {
- using Type = uint64_t;
+struct FindAccumulatorType<I, typename
std::enable_if<IsUnsignedInt<I>::value>::type> {
+ using Type = UInt64Type;
};
template <typename I>
-struct FindAccumulatorType<
- I, typename std::enable_if<std::is_floating_point<I>::value>::type> {
- using Type = double;
+struct FindAccumulatorType<I, typename
std::enable_if<IsFloatingPoint<I>::value>::type> {
+ using Type = DoubleType;
};
struct Datum;
diff --git a/cpp/src/arrow/scalar-test.cc b/cpp/src/arrow/scalar-test.cc
new file mode 100644
index 0000000..580f480
--- /dev/null
+++ b/cpp/src/arrow/scalar-test.cc
@@ -0,0 +1,189 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+#include "arrow/scalar.h"
+#include "arrow/status.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/type_traits.h"
+
+namespace arrow {
+
+TEST(TestNullScalar, Basics) {
+ NullScalar scalar;
+ ASSERT_FALSE(scalar.is_valid);
+ ASSERT_TRUE(scalar.type->Equals(*null()));
+}
+
+template <typename T>
+class TestNumericScalar : public ::testing::Test {
+ public:
+ TestNumericScalar() {}
+};
+
+TYPED_TEST_CASE(TestNumericScalar, NumericArrowTypes);
+
+TYPED_TEST(TestNumericScalar, Basics) {
+ using T = typename TypeParam::c_type;
+ using ScalarType = typename TypeTraits<TypeParam>::ScalarType;
+
+ T value = static_cast<T>(1);
+
+ auto scalar_val = std::make_shared<ScalarType>(value);
+ ASSERT_EQ(value, scalar_val->value);
+ ASSERT_TRUE(scalar_val->is_valid);
+
+ auto expected_type = TypeTraits<TypeParam>::type_singleton();
+ ASSERT_TRUE(scalar_val->type->Equals(*expected_type));
+
+ T other_value = static_cast<T>(2);
+ scalar_val->value = other_value;
+ ASSERT_EQ(other_value, scalar_val->value);
+
+ ScalarType stack_val = ScalarType(0, false);
+ ASSERT_FALSE(stack_val.is_valid);
+}
+
+TEST(TestBinaryScalar, Basics) {
+ std::string data = "test data";
+ auto buf = std::make_shared<Buffer>(data);
+
+ BinaryScalar value(buf);
+ ASSERT_TRUE(value.value->Equals(*buf));
+ ASSERT_TRUE(value.is_valid);
+ ASSERT_TRUE(value.type->Equals(*binary()));
+
+ auto ref_count = buf.use_count();
+ // Check that destructor doesn't fail to clean up a buffer
+ std::shared_ptr<Scalar> base_ref = std::make_shared<BinaryScalar>(buf);
+ base_ref = nullptr;
+ ASSERT_EQ(ref_count, buf.use_count());
+
+ BinaryScalar null_value(nullptr, false);
+ ASSERT_FALSE(null_value.is_valid);
+
+ StringScalar value2(buf);
+ ASSERT_TRUE(value2.value->Equals(*buf));
+ ASSERT_TRUE(value2.is_valid);
+ ASSERT_TRUE(value2.type->Equals(*utf8()));
+
+ StringScalar null_value2(nullptr, false);
+ ASSERT_FALSE(null_value2.is_valid);
+}
+
+TEST(TestFixedSizeBinaryScalar, Basics) {
+ std::string data = "test data";
+ auto buf = std::make_shared<Buffer>(data);
+
+ auto ex_type = fixed_size_binary(9);
+
+ FixedSizeBinaryScalar value(buf, ex_type);
+ ASSERT_TRUE(value.value->Equals(*buf));
+ ASSERT_TRUE(value.is_valid);
+ ASSERT_TRUE(value.type->Equals(*ex_type));
+}
+
+TEST(TestDateScalars, Basics) {
+ int32_t i32_val = 1;
+ Date32Scalar date32_val(i32_val);
+ Date32Scalar date32_null(i32_val, false);
+ ASSERT_EQ(i32_val, date32_val.value);
+ ASSERT_TRUE(date32_val.type->Equals(*date32()));
+ ASSERT_TRUE(date32_val.is_valid);
+ ASSERT_FALSE(date32_null.is_valid);
+
+ int64_t i64_val = 2;
+ Date64Scalar date64_val(i64_val);
+ Date64Scalar date64_null(i64_val, false);
+ ASSERT_EQ(i64_val, date64_val.value);
+ ASSERT_TRUE(date64_val.type->Equals(*date64()));
+ ASSERT_TRUE(date64_val.is_valid);
+ ASSERT_FALSE(date64_null.is_valid);
+}
+
+TEST(TestTimeScalars, Basics) {
+ auto type1 = time32(TimeUnit::MILLI);
+ auto type2 = time32(TimeUnit::SECOND);
+ auto type3 = time64(TimeUnit::MICRO);
+ auto type4 = time64(TimeUnit::NANO);
+
+ int32_t i32_val = 1;
+ Time32Scalar time32_val(i32_val, type1);
+ Time32Scalar time32_null(i32_val, type2, false);
+ ASSERT_EQ(i32_val, time32_val.value);
+ ASSERT_TRUE(time32_val.type->Equals(*type1));
+ ASSERT_TRUE(time32_val.is_valid);
+ ASSERT_FALSE(time32_null.is_valid);
+ ASSERT_TRUE(time32_null.type->Equals(*type2));
+
+ int64_t i64_val = 2;
+ Time64Scalar time64_val(i64_val, type3);
+ Time64Scalar time64_null(i64_val, type4, false);
+ ASSERT_EQ(i64_val, time64_val.value);
+ ASSERT_TRUE(time64_val.type->Equals(*type3));
+ ASSERT_TRUE(time64_val.is_valid);
+ ASSERT_FALSE(time64_null.is_valid);
+ ASSERT_TRUE(time64_null.type->Equals(*type4));
+}
+
+TEST(TestTimestampScalars, Basics) {
+ auto type1 = timestamp(TimeUnit::MILLI);
+ auto type2 = timestamp(TimeUnit::SECOND);
+
+ int64_t val1 = 1;
+ int64_t val2 = 2;
+ TimestampScalar ts_val1(val1, type1);
+ TimestampScalar ts_val2(val2, type2);
+ TimestampScalar ts_null(val2, type1, false);
+ ASSERT_EQ(val1, ts_val1.value);
+ ASSERT_EQ(val2, ts_null.value);
+
+ ASSERT_TRUE(ts_val1.type->Equals(*type1));
+ ASSERT_TRUE(ts_val2.type->Equals(*type2));
+ ASSERT_TRUE(ts_val1.is_valid);
+ ASSERT_FALSE(ts_null.is_valid);
+ ASSERT_TRUE(ts_null.type->Equals(*type1));
+}
+
+// TODO test HalfFloatScalar
+
+} // namespace arrow
diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc
new file mode 100644
index 0000000..a9d7c5d
--- /dev/null
+++ b/cpp/src/arrow/scalar.cc
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/scalar.h"
+
+#include <memory>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+Time32Scalar::Time32Scalar(int32_t value, const std::shared_ptr<DataType>&
type,
+ bool is_valid)
+ : Scalar{type, is_valid}, value(value) {
+ DCHECK_EQ(Type::TIME32, type->id());
+}
+
+Time64Scalar::Time64Scalar(int64_t value, const std::shared_ptr<DataType>&
type,
+ bool is_valid)
+ : Scalar{type, is_valid}, value(value) {
+ DCHECK_EQ(Type::TIME64, type->id());
+}
+
+TimestampScalar::TimestampScalar(int64_t value, const
std::shared_ptr<DataType>& type,
+ bool is_valid)
+ : Scalar{type, is_valid}, value(value) {
+ DCHECK_EQ(Type::TIMESTAMP, type->id());
+}
+
+FixedSizeBinaryScalar::FixedSizeBinaryScalar(const std::shared_ptr<Buffer>&
value,
+ const std::shared_ptr<DataType>&
type,
+ bool is_valid)
+ : BinaryScalar(value, type, is_valid) {
+ DCHECK_EQ(checked_cast<const FixedSizeBinaryType&>(*type).byte_width(),
value->size());
+}
+
+Decimal128Scalar::Decimal128Scalar(const Decimal128& value,
+ const std::shared_ptr<DataType>& type, bool
is_valid)
+ : Scalar{type, is_valid}, value(value) {}
+
+ListScalar::ListScalar(const std::shared_ptr<Array>& value,
+ const std::shared_ptr<DataType>& type, bool is_valid)
+ : Scalar{type, is_valid}, value(value) {}
+
+ListScalar::ListScalar(const std::shared_ptr<Array>& value, bool is_valid)
+ : ListScalar(value, value->type(), is_valid) {}
+
+} // namespace arrow
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
new file mode 100644
index 0000000..b90b26e
--- /dev/null
+++ b/cpp/src/arrow/scalar.h
@@ -0,0 +1,152 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Object model for scalar (non-Array) values. Not intended for use with large
+// amounts of data
+//
+// NOTE: This API is experimental as of the 0.13 version and subject to change
+// without deprecation warnings
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/variant.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+
+/// \brief Base class for scalar values, representing a single value occupying
+/// an array "slot"
+struct ARROW_EXPORT Scalar {
+ virtual ~Scalar() = default;
+
+ /// \brief The type of the scalar value
+ std::shared_ptr<DataType> type;
+
+ /// \brief Whether the value is valid (not null) or not
+ bool is_valid;
+
+ protected:
+ Scalar(const std::shared_ptr<DataType>& type, bool is_valid)
+ : type(type), is_valid(is_valid) {}
+};
+
+/// \brief A scalar value for NullType. Never valid
+struct ARROW_EXPORT NullScalar : public Scalar {
+ public:
+ NullScalar() : Scalar{null(), false} {}
+};
+
+struct ARROW_EXPORT BooleanScalar : public Scalar {
+ bool value;
+ explicit BooleanScalar(bool value, bool is_valid = true)
+ : Scalar{boolean(), is_valid}, value(value) {}
+};
+
+template <typename Type>
+struct NumericScalar : public Scalar {
+ using T = typename Type::c_type;
+ T value;
+
+ explicit NumericScalar(T value, bool is_valid = true)
+ : NumericScalar(value, TypeTraits<Type>::type_singleton(), is_valid) {}
+
+ protected:
+ explicit NumericScalar(T value, const std::shared_ptr<DataType>& type, bool
is_valid)
+ : Scalar{type, is_valid}, value(value) {}
+};
+
+struct ARROW_EXPORT BinaryScalar : public Scalar {
+ std::shared_ptr<Buffer> value;
+ explicit BinaryScalar(const std::shared_ptr<Buffer>& value, bool is_valid =
true)
+ : BinaryScalar(value, binary(), is_valid) {}
+
+ protected:
+ BinaryScalar(const std::shared_ptr<Buffer>& value,
+ const std::shared_ptr<DataType>& type, bool is_valid = true)
+ : Scalar{type, is_valid}, value(value) {}
+};
+
+struct ARROW_EXPORT FixedSizeBinaryScalar : public BinaryScalar {
+ FixedSizeBinaryScalar(const std::shared_ptr<Buffer>& value,
+ const std::shared_ptr<DataType>& type, bool is_valid =
true);
+};
+
+struct ARROW_EXPORT StringScalar : public BinaryScalar {
+ explicit StringScalar(const std::shared_ptr<Buffer>& value, bool is_valid =
true)
+ : BinaryScalar(value, utf8(), is_valid) {}
+};
+
+class ARROW_EXPORT Date32Scalar : public NumericScalar<Date32Type> {
+ public:
+ using NumericScalar<Date32Type>::NumericScalar;
+};
+
+class ARROW_EXPORT Date64Scalar : public NumericScalar<Date64Type> {
+ public:
+ using NumericScalar<Date64Type>::NumericScalar;
+};
+
+class ARROW_EXPORT Time32Scalar : public Scalar {
+ public:
+ int32_t value;
+ Time32Scalar(int32_t value, const std::shared_ptr<DataType>& type,
+ bool is_valid = true);
+};
+
+class ARROW_EXPORT Time64Scalar : public Scalar {
+ public:
+ int64_t value;
+ Time64Scalar(int64_t value, const std::shared_ptr<DataType>& type,
+ bool is_valid = true);
+};
+
+class ARROW_EXPORT TimestampScalar : public Scalar {
+ public:
+ int64_t value;
+ TimestampScalar(int64_t value, const std::shared_ptr<DataType>& type,
+ bool is_valid = true);
+};
+
+struct ARROW_EXPORT Decimal128Scalar : public Scalar {
+ Decimal128 value;
+ Decimal128Scalar(const Decimal128& value, const std::shared_ptr<DataType>&
type,
+ bool is_valid = true);
+};
+
+struct ARROW_EXPORT ListScalar : public Scalar {
+ std::shared_ptr<Array> value;
+
+ ListScalar(const std::shared_ptr<Array>& value, const
std::shared_ptr<DataType>& type,
+ bool is_valid = true);
+
+ explicit ListScalar(const std::shared_ptr<Array>& value, bool is_valid =
true);
+};
+
+struct ARROW_EXPORT StructScalar : public Scalar {
+ std::vector<std::shared_ptr<Scalar>> value;
+};
+
+} // namespace arrow
diff --git a/cpp/src/arrow/testing/gtest_util.h
b/cpp/src/arrow/testing/gtest_util.h
index 722cf25..8fe56ad 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -94,6 +94,13 @@
namespace arrow {
+// ----------------------------------------------------------------------
+// Useful testing::Types declarations
+
+typedef ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type,
Int8Type,
+ Int16Type, Int32Type, Int64Type, FloatType,
DoubleType>
+ NumericArrowTypes;
+
class ChunkedArray;
class Column;
class Table;
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 2593a4f..3c9f9dd 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -45,38 +45,47 @@ class Schema;
class DictionaryType;
class DictionaryArray;
+class DictionaryScalar;
class NullType;
class NullArray;
class NullBuilder;
+struct NullScalar;
class BooleanType;
class BooleanArray;
class BooleanBuilder;
+struct BooleanScalar;
class BinaryType;
class BinaryArray;
class BinaryBuilder;
+struct BinaryScalar;
class FixedSizeBinaryType;
class FixedSizeBinaryArray;
class FixedSizeBinaryBuilder;
+struct FixedSizeBinaryScalar;
class StringType;
class StringArray;
class StringBuilder;
+struct StringScalar;
class ListType;
class ListArray;
class ListBuilder;
+struct ListScalar;
class StructType;
class StructArray;
class StructBuilder;
+struct StructScalar;
class Decimal128Type;
class Decimal128Array;
class Decimal128Builder;
+struct Decimal128Scalar;
class UnionType;
class UnionArray;
@@ -90,10 +99,14 @@ class NumericBuilder;
template <typename TypeClass>
class NumericTensor;
+template <typename TypeClass>
+struct NumericScalar;
+
#define _NUMERIC_TYPE_DECL(KLASS) \
class KLASS##Type; \
using KLASS##Array = NumericArray<KLASS##Type>; \
using KLASS##Builder = NumericBuilder<KLASS##Type>; \
+ using KLASS##Scalar = NumericScalar<KLASS##Type>; \
using KLASS##Tensor = NumericTensor<KLASS##Type>;
_NUMERIC_TYPE_DECL(Int8)
@@ -113,25 +126,31 @@ _NUMERIC_TYPE_DECL(Double)
class Date64Type;
using Date64Array = NumericArray<Date64Type>;
using Date64Builder = NumericBuilder<Date64Type>;
+class Date64Scalar;
class Date32Type;
using Date32Array = NumericArray<Date32Type>;
using Date32Builder = NumericBuilder<Date32Type>;
+class Date32Scalar;
class Time32Type;
using Time32Array = NumericArray<Time32Type>;
using Time32Builder = NumericBuilder<Time32Type>;
+class Time32Scalar;
class Time64Type;
using Time64Array = NumericArray<Time64Type>;
using Time64Builder = NumericBuilder<Time64Type>;
+class Time64Scalar;
class TimestampType;
using TimestampArray = NumericArray<TimestampType>;
using TimestampBuilder = NumericBuilder<TimestampType>;
+class TimestampScalar;
class IntervalType;
using IntervalArray = NumericArray<IntervalType>;
+class IntervalScalar;
// ----------------------------------------------------------------------
// (parameter-free) Factory functions
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index fd1d52a..51cf5ec 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -42,6 +42,7 @@ template <>
struct TypeTraits<NullType> {
using ArrayType = NullArray;
using BuilderType = NullBuilder;
+ using ScalarType = NullScalar;
constexpr static bool is_parameter_free = false;
};
@@ -49,6 +50,7 @@ template <>
struct TypeTraits<BooleanType> {
using ArrayType = BooleanArray;
using BuilderType = BooleanBuilder;
+ using ScalarType = BooleanScalar;
using CType = bool;
static constexpr int64_t bytes_required(int64_t elements) {
@@ -64,11 +66,12 @@ struct CTypeTraits<bool> : public TypeTraits<BooleanType> {
};
#define PRIMITIVE_TYPE_TRAITS_DEF_(CType_, ArrowType_, ArrowArrayType,
ArrowBuilderType, \
- ArrowTensorType, SingletonFn)
\
+ ArrowScalarType, ArrowTensorType,
SingletonFn) \
template <>
\
struct TypeTraits<ArrowType_> {
\
using ArrayType = ArrowArrayType;
\
using BuilderType = ArrowBuilderType;
\
+ using ScalarType = ArrowScalarType;
\
using TensorType = ArrowTensorType;
\
using CType = CType_;
\
static constexpr int64_t bytes_required(int64_t elements) {
\
@@ -86,7 +89,8 @@ struct CTypeTraits<bool> : public TypeTraits<BooleanType> {
#define PRIMITIVE_TYPE_TRAITS_DEF(CType, ArrowShort, SingletonFn) \
PRIMITIVE_TYPE_TRAITS_DEF_( \
CType, ARROW_CONCAT(ArrowShort, Type), ARROW_CONCAT(ArrowShort, Array), \
- ARROW_CONCAT(ArrowShort, Builder), ARROW_CONCAT(ArrowShort, Tensor),
SingletonFn)
+ ARROW_CONCAT(ArrowShort, Builder), ARROW_CONCAT(ArrowShort, Scalar), \
+ ARROW_CONCAT(ArrowShort, Tensor), SingletonFn)
PRIMITIVE_TYPE_TRAITS_DEF(uint8_t, UInt8, uint8)
PRIMITIVE_TYPE_TRAITS_DEF(int8_t, Int8, int8)
@@ -106,6 +110,7 @@ template <>
struct TypeTraits<Date64Type> {
using ArrayType = Date64Array;
using BuilderType = Date64Builder;
+ using ScalarType = Date64Scalar;
static constexpr int64_t bytes_required(int64_t elements) {
return elements * sizeof(int64_t);
@@ -118,6 +123,7 @@ template <>
struct TypeTraits<Date32Type> {
using ArrayType = Date32Array;
using BuilderType = Date32Builder;
+ using ScalarType = Date32Scalar;
static constexpr int64_t bytes_required(int64_t elements) {
return elements * sizeof(int32_t);
@@ -130,6 +136,7 @@ template <>
struct TypeTraits<TimestampType> {
using ArrayType = TimestampArray;
using BuilderType = TimestampBuilder;
+ using ScalarType = TimestampScalar;
static constexpr int64_t bytes_required(int64_t elements) {
return elements * sizeof(int64_t);
@@ -141,6 +148,7 @@ template <>
struct TypeTraits<Time32Type> {
using ArrayType = Time32Array;
using BuilderType = Time32Builder;
+ using ScalarType = Time32Scalar;
static constexpr int64_t bytes_required(int64_t elements) {
return elements * sizeof(int32_t);
@@ -152,6 +160,7 @@ template <>
struct TypeTraits<Time64Type> {
using ArrayType = Time64Array;
using BuilderType = Time64Builder;
+ using ScalarType = Time64Scalar;
static constexpr int64_t bytes_required(int64_t elements) {
return elements * sizeof(int64_t);
@@ -163,6 +172,7 @@ template <>
struct TypeTraits<HalfFloatType> {
using ArrayType = HalfFloatArray;
using BuilderType = HalfFloatBuilder;
+ using ScalarType = HalfFloatScalar;
using TensorType = HalfFloatTensor;
static constexpr int64_t bytes_required(int64_t elements) {
@@ -176,6 +186,24 @@ template <>
struct TypeTraits<Decimal128Type> {
using ArrayType = Decimal128Array;
using BuilderType = Decimal128Builder;
+ using ScalarType = Decimal128Scalar;
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
+struct TypeTraits<BinaryType> {
+ using ArrayType = BinaryArray;
+ using BuilderType = BinaryBuilder;
+ using ScalarType = BinaryScalar;
+ constexpr static bool is_parameter_free = true;
+ static inline std::shared_ptr<DataType> type_singleton() { return binary(); }
+};
+
+template <>
+struct TypeTraits<FixedSizeBinaryType> {
+ using ArrayType = FixedSizeBinaryArray;
+ using BuilderType = FixedSizeBinaryBuilder;
+ using ScalarType = FixedSizeBinaryScalar;
constexpr static bool is_parameter_free = false;
};
@@ -183,6 +211,7 @@ template <>
struct TypeTraits<StringType> {
using ArrayType = StringArray;
using BuilderType = StringBuilder;
+ using ScalarType = StringScalar;
constexpr static bool is_parameter_free = true;
static inline std::shared_ptr<DataType> type_singleton() { return utf8(); }
};
@@ -198,24 +227,10 @@ struct CTypeTraits<char*> : public TypeTraits<StringType>
{
};
template <>
-struct TypeTraits<BinaryType> {
- using ArrayType = BinaryArray;
- using BuilderType = BinaryBuilder;
- constexpr static bool is_parameter_free = true;
- static inline std::shared_ptr<DataType> type_singleton() { return binary(); }
-};
-
-template <>
-struct TypeTraits<FixedSizeBinaryType> {
- using ArrayType = FixedSizeBinaryArray;
- using BuilderType = FixedSizeBinaryBuilder;
- constexpr static bool is_parameter_free = false;
-};
-
-template <>
struct TypeTraits<ListType> {
using ArrayType = ListArray;
using BuilderType = ListBuilder;
+ using ScalarType = ListScalar;
constexpr static bool is_parameter_free = false;
};
@@ -232,6 +247,7 @@ template <>
struct TypeTraits<StructType> {
using ArrayType = StructArray;
using BuilderType = StructBuilder;
+ using ScalarType = StructScalar;
constexpr static bool is_parameter_free = false;
};
@@ -244,6 +260,8 @@ struct TypeTraits<UnionType> {
template <>
struct TypeTraits<DictionaryType> {
using ArrayType = DictionaryArray;
+ // TODO(wesm): Not sure what to do about this
+ // using ScalarType = DictionaryScalar;
constexpr static bool is_parameter_free = false;
};
diff --git a/run-cmake-format.py b/run-cmake-format.py
index 03747db..ea356a9 100755
--- a/run-cmake-format.py
+++ b/run-cmake-format.py
@@ -40,7 +40,8 @@ def find_cmake_files():
def run_cmake_format(paths):
- # cmake-format is fast enough that running in parallel doesn't seem
necessary
+ # cmake-format is fast enough that running in parallel doesn't seem
+ # necessary
cmd = ['cmake-format', '--in-place'] + paths
try:
subprocess.run(cmd, check=True)