bkietz commented on a change in pull request #11384: URL: https://github.com/apache/arrow/pull/11384#discussion_r741160879
########## File path: cpp/cmake_modules/BuildUtils.cmake ########## @@ -723,22 +724,27 @@ function(ADD_TEST_CASE REL_TEST_NAME) add_dependencies(${TEST_NAME} ${ARG_EXTRA_DEPENDENCIES}) endif() + if(ARG_ENVIRONMENT) + message(STATUS "WTF ${ARG_ENVIRONMENT}") Review comment: good catch, will remove ########## File path: cpp/src/arrow/compute/exec/CMakeLists.txt ########## @@ -31,3 +31,11 @@ add_arrow_compute_test(union_node_test PREFIX "arrow-compute") add_arrow_compute_test(util_test PREFIX "arrow-compute") add_arrow_benchmark(expression_benchmark PREFIX "arrow-compute") + +add_arrow_compute_test(ir_test + PREFIX + "arrow-compute" + EXTRA_LINK_LIBS + ${GFLAGS_LIBRARIES} + TEST_ARGUMENTS + "-computeir_dir=${CMAKE_SOURCE_DIR}/../experimental/computeir") Review comment: Gflags accepts this but I'll change the invocation to look like [gnu style](https://www.gnu.org/prep/standards/html_node/Command_002dLine-Interfaces.html) ########## File path: cpp/src/arrow/compute/exec/exec_plan.h ########## @@ -351,13 +351,28 @@ struct ARROW_EXPORT Declaration { options{std::make_shared<Options>(std::move(options))}, label{this->factory_name} {} + template <typename Options> + Declaration(std::string factory_name, std::vector<Input> inputs, Options options, + std::string label) + : factory_name{std::move(factory_name)}, + inputs{std::move(inputs)}, + options{std::make_shared<Options>(std::move(options))}, + label{std::move(label)} {} + template <typename Options> Declaration(std::string factory_name, Options options) : factory_name{std::move(factory_name)}, inputs{}, options{std::make_shared<Options>(std::move(options))}, label{this->factory_name} {} + template <typename Options> + Declaration(std::string factory_name, Options options, std::string label) + : factory_name{std::move(factory_name)}, + inputs{}, + options{std::make_shared<Options>(std::move(options))}, + label{std::move(label)} {} Review comment: we can use them if you prefer ########## File path: cpp/src/arrow/type.h ########## @@ -188,6 +188,9 @@ class ARROW_EXPORT DataType : public detail::Fingerprintable { ARROW_EXPORT std::ostream& operator<<(std::ostream& os, const DataType& type); +inline bool operator==(const DataType& l, const DataType& r) { return l.Equals(r); } Review comment: :+1: ########## File path: cpp/src/arrow/util/io_util.cc ########## @@ -578,6 +607,19 @@ Result<std::vector<PlatformFilename>> ListDir(const PlatformFilename& dir_path) #endif +Status SetWorkingDir(const PlatformFilename& dir_path) { Review comment: It can be removed, will do ########## File path: cpp/src/arrow/scalar.h ########## @@ -302,7 +302,7 @@ struct TemporalScalar : internal::PrimitiveScalar<T> { using internal::PrimitiveScalar<T>::PrimitiveScalar; using ValueType = typename TemporalScalar<T>::ValueType; - explicit TemporalScalar(ValueType value, std::shared_ptr<DataType> type) + TemporalScalar(ValueType value, std::shared_ptr<DataType> type) Review comment: If the constructor could accept a single argument, `explicit` would allow implicit conversion to `TemporalScalar`. Since the constructor requires two arguments the keyword is fully redundant (which clang-tidy nagged me about when I opened the file, prompting the edit) ########## File path: cpp/src/arrow/compute/kernels/vector_sort.cc ########## @@ -51,6 +51,19 @@ struct SortField { SortOrder order; }; +Status CheckNonNested(const FieldRef& ref) { + if (ref.IsNested()) { + return Status::KeyError("Nested keys not supported for SortKeys"); Review comment: `FieldRef` supports referencing fields of fields, for example `FieldRef::FromDotPath('.alpha.beta')` could be applied to the schema ``` schema({ field("alpha", struct_({ field("beta", int32()), })), }) ``` to acquire the inner field. Supporting that here would require refactoring `vector_sort.cc` to not assume that field references can be expressed as a single `int field_index` and instead support a path of field indices. I think that's out of scope for this PR ########## File path: cpp/src/arrow/compute/kernels/vector_sort.cc ########## @@ -51,6 +51,19 @@ struct SortField { SortOrder order; }; +Status CheckNonNested(const FieldRef& ref) { + if (ref.IsNested()) { + return Status::KeyError("Nested keys not supported for SortKeys"); Review comment: `FieldRef` supports referencing fields of fields, for example `FieldRef::FromDotPath('.alpha.beta')` could be applied to the schema ```c++ schema({ field("alpha", struct_({ field("beta", int32()), })), }) ``` to acquire the inner field. Supporting that here would require refactoring `vector_sort.cc` to not assume that field references can be expressed as a single `int field_index` and instead support a path of field indices. I think that's out of scope for this PR ########## File path: cpp/src/arrow/ipc/metadata_internal.h ########## @@ -125,38 +125,49 @@ inline std::string StringFromFlatbuffers(const flatbuffers::String* s) { // dictionary-encoded fields to a DictionaryMemo instance. May be // expensive for very large schemas if you are only interested in a // few fields +ARROW_EXPORT Review comment: Correct, they are used in `ir_consumer.cc` now ########## File path: cpp/src/arrow/compute/kernels/vector_sort.cc ########## @@ -91,6 +104,18 @@ Result<std::vector<ResolvedSortKey>> ResolveSortKeys( }); } +std::shared_ptr<ChunkedArray> GetTableColumn(const Table& table, const FieldRef& ref) { Review comment: will do ########## File path: cpp/src/arrow/compute/api_vector.h ########## @@ -98,17 +98,17 @@ enum class NullPlacement { /// \brief One sort key for PartitionNthIndices (TODO) and SortIndices class ARROW_EXPORT SortKey : public util::EqualityComparable<SortKey> { public: - explicit SortKey(std::string name, SortOrder order = SortOrder::Ascending) - : name(std::move(name)), order(order) {} + explicit SortKey(FieldRef target, SortOrder order = SortOrder::Ascending) + : target(std::move(target)), order(order) {} using util::EqualityComparable<SortKey>::Equals; using util::EqualityComparable<SortKey>::operator==; using util::EqualityComparable<SortKey>::operator!=; bool Equals(const SortKey& other) const; std::string ToString() const; - /// The name of the sort column. - std::string name; + /// A FieldRef targetting the sort column. + FieldRef target; Review comment: This API is explicitly experimental, so I don't think we need be so conservative about changing it. It *would* be a `struct` (semantically it functions as one) except that it needs a virtual destructor. This compromise between `struct` and `class` is used for a number of `class /.*Options` and *is* a deviation from the style guide. I don't think replacing the public fields with full classy access boilerplate is in scope for this PR, but I can write a follow up JIRA if you like ########## File path: cpp/src/arrow/compute/api_vector.h ########## @@ -98,17 +98,17 @@ enum class NullPlacement { /// \brief One sort key for PartitionNthIndices (TODO) and SortIndices class ARROW_EXPORT SortKey : public util::EqualityComparable<SortKey> { public: - explicit SortKey(std::string name, SortOrder order = SortOrder::Ascending) - : name(std::move(name)), order(order) {} + explicit SortKey(FieldRef target, SortOrder order = SortOrder::Ascending) + : target(std::move(target)), order(order) {} using util::EqualityComparable<SortKey>::Equals; using util::EqualityComparable<SortKey>::operator==; using util::EqualityComparable<SortKey>::operator!=; bool Equals(const SortKey& other) const; std::string ToString() const; - /// The name of the sort column. - std::string name; + /// A FieldRef targetting the sort column. + FieldRef target; Review comment: This API is explicitly experimental, so I don't think we need be so conservative about changing it. It *would* be a `struct` (semantically it functions as one) except that it needs a virtual destructor. This compromise between `struct` and `class` is used for a number of `class /.*Options` and *is* a deviation from the style guide, but facilitates a more terse specification of options structs. I don't think replacing the public fields with full classy access boilerplate is in scope for this PR, but I can write a follow up JIRA if you like ########## File path: cpp/src/arrow/util/io_util.cc ########## @@ -578,6 +607,19 @@ Result<std::vector<PlatformFilename>> ListDir(const PlatformFilename& dir_path) #endif +Status SetWorkingDir(const PlatformFilename& dir_path) { Review comment: FTR, I used this because flatc always outputs the binary representation to the current working directory (it will not accept the `-o` option) and it seemed neater to ensure we were always writing that to the temporary directory we allocated ########## File path: cpp/src/arrow/compute/exec/test_util.cc ########## @@ -235,5 +239,181 @@ void AssertExecBatchesEqual(const std::shared_ptr<Schema>& schema, AssertTablesEqual(exp_tab, act_tab); } +template <typename T> +static const T& OptionsAs(const ExecNodeOptions& opts) { + const auto& ptr = dynamic_cast<const T&>(opts); Review comment: checked_cast should be fine here -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org