bkietz commented on a change in pull request #11384:
URL: https://github.com/apache/arrow/pull/11384#discussion_r741160879



##########
File path: cpp/cmake_modules/BuildUtils.cmake
##########
@@ -723,22 +724,27 @@ function(ADD_TEST_CASE REL_TEST_NAME)
     add_dependencies(${TEST_NAME} ${ARG_EXTRA_DEPENDENCIES})
   endif()
 
+  if(ARG_ENVIRONMENT)
+    message(STATUS "WTF ${ARG_ENVIRONMENT}")

Review comment:
       good catch, will remove
   

##########
File path: cpp/src/arrow/compute/exec/CMakeLists.txt
##########
@@ -31,3 +31,11 @@ add_arrow_compute_test(union_node_test PREFIX 
"arrow-compute")
 add_arrow_compute_test(util_test PREFIX "arrow-compute")
 
 add_arrow_benchmark(expression_benchmark PREFIX "arrow-compute")
+
+add_arrow_compute_test(ir_test
+                       PREFIX
+                       "arrow-compute"
+                       EXTRA_LINK_LIBS
+                       ${GFLAGS_LIBRARIES}
+                       TEST_ARGUMENTS
+                       
"-computeir_dir=${CMAKE_SOURCE_DIR}/../experimental/computeir")

Review comment:
       Gflags accepts this but I'll change the invocation to look like [gnu 
style](https://www.gnu.org/prep/standards/html_node/Command_002dLine-Interfaces.html)

##########
File path: cpp/src/arrow/compute/exec/exec_plan.h
##########
@@ -351,13 +351,28 @@ struct ARROW_EXPORT Declaration {
         options{std::make_shared<Options>(std::move(options))},
         label{this->factory_name} {}
 
+  template <typename Options>
+  Declaration(std::string factory_name, std::vector<Input> inputs, Options 
options,
+              std::string label)
+      : factory_name{std::move(factory_name)},
+        inputs{std::move(inputs)},
+        options{std::make_shared<Options>(std::move(options))},
+        label{std::move(label)} {}
+
   template <typename Options>
   Declaration(std::string factory_name, Options options)
       : factory_name{std::move(factory_name)},
         inputs{},
         options{std::make_shared<Options>(std::move(options))},
         label{this->factory_name} {}
 
+  template <typename Options>
+  Declaration(std::string factory_name, Options options, std::string label)
+      : factory_name{std::move(factory_name)},
+        inputs{},
+        options{std::make_shared<Options>(std::move(options))},
+        label{std::move(label)} {}

Review comment:
       we can use them if you prefer

##########
File path: cpp/src/arrow/type.h
##########
@@ -188,6 +188,9 @@ class ARROW_EXPORT DataType : public 
detail::Fingerprintable {
 ARROW_EXPORT
 std::ostream& operator<<(std::ostream& os, const DataType& type);
 
+inline bool operator==(const DataType& l, const DataType& r) { return 
l.Equals(r); }

Review comment:
       :+1: 

##########
File path: cpp/src/arrow/util/io_util.cc
##########
@@ -578,6 +607,19 @@ Result<std::vector<PlatformFilename>> ListDir(const 
PlatformFilename& dir_path)
 
 #endif
 
+Status SetWorkingDir(const PlatformFilename& dir_path) {

Review comment:
       It can be removed, will do

##########
File path: cpp/src/arrow/scalar.h
##########
@@ -302,7 +302,7 @@ struct TemporalScalar : internal::PrimitiveScalar<T> {
   using internal::PrimitiveScalar<T>::PrimitiveScalar;
   using ValueType = typename TemporalScalar<T>::ValueType;
 
-  explicit TemporalScalar(ValueType value, std::shared_ptr<DataType> type)
+  TemporalScalar(ValueType value, std::shared_ptr<DataType> type)

Review comment:
       If the constructor could accept a single argument, `explicit` would 
allow implicit conversion to `TemporalScalar`. Since the constructor requires 
two arguments the keyword is fully redundant (which clang-tidy nagged me about 
when I opened the file, prompting the edit)

##########
File path: cpp/src/arrow/compute/kernels/vector_sort.cc
##########
@@ -51,6 +51,19 @@ struct SortField {
   SortOrder order;
 };
 
+Status CheckNonNested(const FieldRef& ref) {
+  if (ref.IsNested()) {
+    return Status::KeyError("Nested keys not supported for SortKeys");

Review comment:
       `FieldRef` supports referencing fields of fields, for example 
`FieldRef::FromDotPath('.alpha.beta')` could be applied to the schema
   ```
   schema({
     field("alpha", struct_({
       field("beta", int32()),
     })),
   })
   ```
   to acquire the inner field. Supporting that here would require refactoring 
`vector_sort.cc` to not assume that field references can be expressed as a 
single `int field_index` and instead support a path of field indices. I think 
that's out of scope for this PR

##########
File path: cpp/src/arrow/compute/kernels/vector_sort.cc
##########
@@ -51,6 +51,19 @@ struct SortField {
   SortOrder order;
 };
 
+Status CheckNonNested(const FieldRef& ref) {
+  if (ref.IsNested()) {
+    return Status::KeyError("Nested keys not supported for SortKeys");

Review comment:
       `FieldRef` supports referencing fields of fields, for example 
`FieldRef::FromDotPath('.alpha.beta')` could be applied to the schema
   ```c++
   schema({
     field("alpha", struct_({
       field("beta", int32()),
     })),
   })
   ```
   to acquire the inner field. Supporting that here would require refactoring 
`vector_sort.cc` to not assume that field references can be expressed as a 
single `int field_index` and instead support a path of field indices. I think 
that's out of scope for this PR

##########
File path: cpp/src/arrow/ipc/metadata_internal.h
##########
@@ -125,38 +125,49 @@ inline std::string StringFromFlatbuffers(const 
flatbuffers::String* s) {
 // dictionary-encoded fields to a DictionaryMemo instance. May be
 // expensive for very large schemas if you are only interested in a
 // few fields
+ARROW_EXPORT

Review comment:
       Correct, they are used in `ir_consumer.cc` now

##########
File path: cpp/src/arrow/compute/kernels/vector_sort.cc
##########
@@ -91,6 +104,18 @@ Result<std::vector<ResolvedSortKey>> ResolveSortKeys(
       });
 }
 
+std::shared_ptr<ChunkedArray> GetTableColumn(const Table& table, const 
FieldRef& ref) {

Review comment:
       will do
   

##########
File path: cpp/src/arrow/compute/api_vector.h
##########
@@ -98,17 +98,17 @@ enum class NullPlacement {
 /// \brief One sort key for PartitionNthIndices (TODO) and SortIndices
 class ARROW_EXPORT SortKey : public util::EqualityComparable<SortKey> {
  public:
-  explicit SortKey(std::string name, SortOrder order = SortOrder::Ascending)
-      : name(std::move(name)), order(order) {}
+  explicit SortKey(FieldRef target, SortOrder order = SortOrder::Ascending)
+      : target(std::move(target)), order(order) {}
 
   using util::EqualityComparable<SortKey>::Equals;
   using util::EqualityComparable<SortKey>::operator==;
   using util::EqualityComparable<SortKey>::operator!=;
   bool Equals(const SortKey& other) const;
   std::string ToString() const;
 
-  /// The name of the sort column.
-  std::string name;
+  /// A FieldRef targetting the sort column.
+  FieldRef target;

Review comment:
       This API is explicitly experimental, so I don't think we need be so 
conservative about changing it. It *would* be a `struct` (semantically it 
functions as one) except that it needs a virtual destructor. This compromise 
between `struct` and `class` is used for a number of `class /.*Options` and 
*is* a deviation from the style guide. I don't think replacing the public 
fields with full classy access boilerplate is in scope for this PR, but I can 
write a follow up JIRA if you like

##########
File path: cpp/src/arrow/compute/api_vector.h
##########
@@ -98,17 +98,17 @@ enum class NullPlacement {
 /// \brief One sort key for PartitionNthIndices (TODO) and SortIndices
 class ARROW_EXPORT SortKey : public util::EqualityComparable<SortKey> {
  public:
-  explicit SortKey(std::string name, SortOrder order = SortOrder::Ascending)
-      : name(std::move(name)), order(order) {}
+  explicit SortKey(FieldRef target, SortOrder order = SortOrder::Ascending)
+      : target(std::move(target)), order(order) {}
 
   using util::EqualityComparable<SortKey>::Equals;
   using util::EqualityComparable<SortKey>::operator==;
   using util::EqualityComparable<SortKey>::operator!=;
   bool Equals(const SortKey& other) const;
   std::string ToString() const;
 
-  /// The name of the sort column.
-  std::string name;
+  /// A FieldRef targetting the sort column.
+  FieldRef target;

Review comment:
       This API is explicitly experimental, so I don't think we need be so 
conservative about changing it. It *would* be a `struct` (semantically it 
functions as one) except that it needs a virtual destructor. This compromise 
between `struct` and `class` is used for a number of `class /.*Options` and 
*is* a deviation from the style guide, but facilitates a more terse 
specification of options structs. I don't think replacing the public fields 
with full classy access boilerplate is in scope for this PR, but I can write a 
follow up JIRA if you like

##########
File path: cpp/src/arrow/util/io_util.cc
##########
@@ -578,6 +607,19 @@ Result<std::vector<PlatformFilename>> ListDir(const 
PlatformFilename& dir_path)
 
 #endif
 
+Status SetWorkingDir(const PlatformFilename& dir_path) {

Review comment:
       FTR, I used this because flatc always outputs the binary representation 
to the current working directory (it will not accept the `-o` option) and it 
seemed neater to ensure we were always writing that to the temporary directory 
we allocated

##########
File path: cpp/src/arrow/compute/exec/test_util.cc
##########
@@ -235,5 +239,181 @@ void AssertExecBatchesEqual(const 
std::shared_ptr<Schema>& schema,
   AssertTablesEqual(exp_tab, act_tab);
 }
 
+template <typename T>
+static const T& OptionsAs(const ExecNodeOptions& opts) {
+  const auto& ptr = dynamic_cast<const T&>(opts);

Review comment:
       checked_cast should be fine here




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to