[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-15 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r440565678



##
File path: cpp/src/arrow/testing/random.cc
##
@@ -84,7 +84,7 @@ std::shared_ptr RandomArrayGenerator::Boolean(int64_t 
size, double probab
 
   BufferVector buffers{2};
   // Need 2 distinct generators such that probabilities are not shared.
-  GenOpt value_gen(seed(), 0, 1, probability);
+  GenOpt value_gen(seed(), 0, 1, 1 - probability);

Review comment:
   @pitrou or @fsaintjacques could you please confirm this change





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-16 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r440918212



##
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##
@@ -0,0 +1,1758 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include 
+#include 
+#include 
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/array_dict.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/concatenate.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/extension_type.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/int_util.h"
+
+namespace arrow {
+
+using internal::BinaryBitBlockCounter;
+using internal::BitBlockCount;
+using internal::BitBlockCounter;
+using internal::BitmapReader;
+using internal::CopyBitmap;
+using internal::GetArrayView;
+using internal::IndexBoundsCheck;
+using internal::OptionalBitBlockCounter;
+using internal::OptionalBitIndexer;
+
+namespace compute {
+namespace internal {
+
+int64_t GetFilterOutputSize(const ArrayData& filter,
+FilterOptions::NullSelectionBehavior 
null_selection) {
+  int64_t output_size = 0;
+  int64_t position = 0;
+  if (filter.GetNullCount() > 0) {
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+BinaryBitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset,
+  filter_is_valid, filter.offset, 
filter.length);
+if (null_selection == FilterOptions::EMIT_NULL) {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextOrNotWord();
+output_size += block.popcount;
+position += block.length;
+  }
+} else {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextAndWord();
+output_size += block.popcount;
+position += block.length;
+  }
+}
+  } else {
+// The filter has no nulls, so we plow through its data as fast as
+// possible.
+BitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset, 
filter.length);
+while (position < filter.length) {
+  BitBlockCount block = bit_counter.NextFourWords();
+  output_size += block.popcount;
+  position += block.length;
+}
+  }
+  return output_size;
+}
+
+template 
+Result> GetTakeIndicesImpl(
+const ArrayData& filter, FilterOptions::NullSelectionBehavior 
null_selection,
+MemoryPool* memory_pool) {
+  using T = typename IndexType::c_type;
+  typename TypeTraits::BuilderType builder(memory_pool);
+
+  const uint8_t* filter_data = filter.buffers[1]->data();
+  BitBlockCounter data_counter(filter_data, filter.offset, filter.length);
+
+  // The position relative to the start of the filter
+  T position = 0;
+
+  // The current position taking the filter offset into account
+  int64_t position_with_offset = filter.offset;
+  if (filter.GetNullCount() > 0) {
+// The filter has nulls, so we scan the validity bitmap and the filter data
+// bitmap together, branching on the null selection type.
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+
+// To count blocks whether filter_data[i] || !filter_is_valid[i]
+BinaryBitBlockCounter filter_counter(filter_data, filter.offset, 
filter_is_valid,
+ filter.offset, filter.length);
+if (null_selection == FilterOptions::DROP) {
+  while (position < filter.length) {
+BitBlockCount and_block = filter_counter.NextAndWord();
+RETURN_NOT_OK(builder.Reserve(and_block.popcount));
+if (and_block.IsFull()) {
+  // All the values are selected and non-null
+  for (int64_t i = 0; i < and_block.length; ++i) {
+builder.UnsafeAppend(position++);
+  }
+  position_with_offset += and_block.length;
+ 

[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-16 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r440975863



##
File path: cpp/src/arrow/util/bit_block_counter.h
##
@@ -33,11 +33,50 @@ class Buffer;
 
 namespace internal {
 
+namespace detail {
+
+// These templates are here to help with unit tests
+
+template 
+struct BitBlockAnd {
+  static T Call(T left, T right) { return left & right; }
+};
+
+template <>
+struct BitBlockAnd {
+  static bool Call(bool left, bool right) { return left && right; }
+};
+
+template 
+struct BitBlockOr {
+  static T Call(T left, T right) { return left | right; }
+};
+
+template <>
+struct BitBlockOr {
+  static bool Call(bool left, bool right) { return left || right; }
+};
+
+template 
+struct BitBlockOrNot {
+  static T Call(T left, T right) { return left | ~right; }
+};
+
+template <>
+struct BitBlockOrNot {
+  static bool Call(bool left, bool right) { return left || !right; }
+};
+
+}  // namespace detail
+
 /// \brief Return value from bit block counters: the total number of bits and
 /// the number of set bits.
 struct BitBlockCount {
   int16_t length;
   int16_t popcount;
+
+  bool IsEmpty() const { return this->popcount == 0; }

Review comment:
   Good point. I'll rename them





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-17 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r441612459



##
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##
@@ -0,0 +1,1816 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include 
+#include 
+#include 
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/array_dict.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/concatenate.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/extension_type.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/int_util.h"
+
+namespace arrow {
+
+using internal::BinaryBitBlockCounter;
+using internal::BitBlockCount;
+using internal::BitBlockCounter;
+using internal::BitmapReader;
+using internal::CopyBitmap;
+using internal::GetArrayView;
+using internal::IndexBoundsCheck;
+using internal::OptionalBitBlockCounter;
+using internal::OptionalBitIndexer;
+
+namespace compute {
+namespace internal {
+
+int64_t GetFilterOutputSize(const ArrayData& filter,
+FilterOptions::NullSelectionBehavior 
null_selection) {
+  int64_t output_size = 0;
+  int64_t position = 0;
+  if (filter.GetNullCount() > 0) {
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+BinaryBitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset,
+  filter_is_valid, filter.offset, 
filter.length);
+if (null_selection == FilterOptions::EMIT_NULL) {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextOrNotWord();
+output_size += block.popcount;
+position += block.length;
+  }
+} else {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextAndWord();
+output_size += block.popcount;
+position += block.length;
+  }
+}
+  } else {
+// The filter has no nulls, so we plow through its data as fast as
+// possible.
+BitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset, 
filter.length);
+while (position < filter.length) {
+  BitBlockCount block = bit_counter.NextFourWords();
+  output_size += block.popcount;
+  position += block.length;
+}
+  }
+  return output_size;
+}
+
+template 
+Result> GetTakeIndicesImpl(
+const ArrayData& filter, FilterOptions::NullSelectionBehavior 
null_selection,
+MemoryPool* memory_pool) {
+  using T = typename IndexType::c_type;
+  typename TypeTraits::BuilderType builder(memory_pool);
+
+  const uint8_t* filter_data = filter.buffers[1]->data();
+  BitBlockCounter data_counter(filter_data, filter.offset, filter.length);
+
+  // The position relative to the start of the filter
+  T position = 0;
+
+  // The current position taking the filter offset into account
+  int64_t position_with_offset = filter.offset;
+  if (filter.GetNullCount() > 0) {
+// The filter has nulls, so we scan the validity bitmap and the filter data
+// bitmap together, branching on the null selection type.
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+
+// To count blocks whether filter_data[i] || !filter_is_valid[i]
+BinaryBitBlockCounter filter_counter(filter_data, filter.offset, 
filter_is_valid,
+ filter.offset, filter.length);
+if (null_selection == FilterOptions::DROP) {
+  while (position < filter.length) {
+BitBlockCount and_block = filter_counter.NextAndWord();
+RETURN_NOT_OK(builder.Reserve(and_block.popcount));
+if (and_block.AllSet()) {
+  // All the values are selected and non-null
+  for (int64_t i = 0; i < and_block.length; ++i) {
+builder.UnsafeAppend(position++);
+  }
+  position_with_offset += and_block.length;
+ 

[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-17 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r441613340



##
File path: cpp/src/arrow/compute/kernels/util_internal.cc
##
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/kernels/util_internal.h"
+
+#include 
+
+#include "arrow/array/data.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+namespace compute {
+namespace internal {
+
+const uint8_t* GetValidityBitmap(const ArrayData& data) {
+  const uint8_t* bitmap = nullptr;
+  if (data.buffers[0]) {
+bitmap = data.buffers[0]->data();
+  }
+  return bitmap;
+}
+
+int GetBitWidth(const DataType& type) {
+  return checked_cast(type).bit_width();
+}
+
+PrimitiveArg GetPrimitiveArg(const ArrayData& arr) {
+  PrimitiveArg arg;
+  arg.is_valid = GetValidityBitmap(arr);
+  arg.data = arr.buffers[1]->data();
+  arg.bit_width = GetBitWidth(*arr.type);
+  arg.offset = arr.offset;
+  arg.length = arr.length;
+  if (arg.bit_width > 1) {
+arg.data += arr.offset * arg.bit_width / 8;
+  }
+  arg.null_count = arr.GetNullCount();

Review comment:
   I think now that I've fixed the problem in 
https://github.com/apache/arrow/commit/37c9804784325502bf47b651252c39bdcf3e03a9 
I don't need to compute it here





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-17 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r441613579



##
File path: cpp/src/arrow/compute/kernels/vector_selection_test.cc
##
@@ -0,0 +1,1637 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "arrow/compute/api.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/table.h"
+#include "arrow/testing/gtest_common.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/testing/util.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+using util::string_view;
+
+namespace compute {
+
+// --
+// Some random data generation helpers
+
+template 
+std::shared_ptr RandomNumeric(int64_t length, double null_probability,
+ random::RandomArrayGenerator* rng) {
+  return rng->Numeric(length, 0, 127, null_probability);
+}
+
+std::shared_ptr RandomBoolean(int64_t length, double null_probability,
+ random::RandomArrayGenerator* rng) {
+  return rng->Boolean(length, 0.5, null_probability);
+}
+
+std::shared_ptr RandomString(int64_t length, double null_probability,
+random::RandomArrayGenerator* rng) {
+  return rng->String(length, 0, 32, null_probability);
+}
+
+std::shared_ptr RandomLargeString(int64_t length, double 
null_probability,
+ random::RandomArrayGenerator* rng) {
+  return rng->LargeString(length, 0, 32, null_probability);
+}
+
+std::shared_ptr RandomFixedSizeBinary(int64_t length, double 
null_probability,
+ random::RandomArrayGenerator* 
rng) {
+  const int32_t value_size = 16;
+  int64_t data_nbytes = length * value_size;
+  std::shared_ptr data = *AllocateBuffer(data_nbytes);
+  random_bytes(data_nbytes, /*seed=*/0, data->mutable_data());
+  auto validity = rng->Boolean(length, 1 - null_probability);
+
+  // Assemble the data for a FixedSizeBinaryArray
+  auto values_data = 
std::make_shared(fixed_size_binary(value_size), length);
+  values_data->buffers = {validity->data()->buffers[1], data};
+  return MakeArray(values_data);
+}
+
+// --
+
+TEST(GetTakeIndices, Basics) {
+  auto CheckCase = [&](const std::string& filter_json, const std::string& 
indices_json,
+   FilterOptions::NullSelectionBehavior null_selection,
+   const std::shared_ptr& indices_type = 
uint16()) {
+auto filter = ArrayFromJSON(boolean(), filter_json);
+auto expected_indices = ArrayFromJSON(indices_type, indices_json);
+ASSERT_OK_AND_ASSIGN(auto indices,
+ internal::GetTakeIndices(*filter->data(), 
null_selection));
+AssertArraysEqual(*expected_indices, *MakeArray(indices), 
/*verbose=*/true);
+  };
+
+  // Drop null cases
+  CheckCase("[]", "[]", FilterOptions::DROP);
+  CheckCase("[null]", "[]", FilterOptions::DROP);
+  CheckCase("[null, false, true, true, false, true]", "[2, 3, 5]", 
FilterOptions::DROP);
+
+  // Emit null cases
+  CheckCase("[]", "[]", FilterOptions::EMIT_NULL);
+  CheckCase("[null]", "[null]", FilterOptions::EMIT_NULL);
+  CheckCase("[null, false, true, true]", "[null, 2, 3]", 
FilterOptions::EMIT_NULL);
+}
+
+// TODO: Add slicing
+
+template 
+void CheckGetTakeIndicesCase(const Array& untyped_filter) {
+  const auto& filter = checked_cast(untyped_filter);
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr drop_indices,
+   internal::GetTakeIndices(*filter.data(), 
FilterOptions::DROP));
+  // Verify DROP indices
+  {
+IndexArrayType indices(drop_indices);
+int64_t out_position = 0;
+for (int64_t i = 0; i < filter.length(); ++i) {
+  if (filter.IsValid(i)) {
+if (filter.Value(i)) {
+  ASSERT_EQ(indices.Value(out_position), i);
+  ++out_position;
+}
+  }
+}
+// Check that the end length agrees with the output of GetFilterOutputSi

[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-17 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r441643717



##
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##
@@ -0,0 +1,1816 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include 
+#include 
+#include 
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/array_dict.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/concatenate.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/extension_type.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/int_util.h"
+
+namespace arrow {
+
+using internal::BinaryBitBlockCounter;
+using internal::BitBlockCount;
+using internal::BitBlockCounter;
+using internal::BitmapReader;
+using internal::CopyBitmap;
+using internal::GetArrayView;
+using internal::IndexBoundsCheck;
+using internal::OptionalBitBlockCounter;
+using internal::OptionalBitIndexer;
+
+namespace compute {
+namespace internal {
+
+int64_t GetFilterOutputSize(const ArrayData& filter,
+FilterOptions::NullSelectionBehavior 
null_selection) {
+  int64_t output_size = 0;
+  int64_t position = 0;
+  if (filter.GetNullCount() > 0) {
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+BinaryBitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset,
+  filter_is_valid, filter.offset, 
filter.length);
+if (null_selection == FilterOptions::EMIT_NULL) {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextOrNotWord();
+output_size += block.popcount;
+position += block.length;
+  }
+} else {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextAndWord();
+output_size += block.popcount;
+position += block.length;
+  }
+}
+  } else {
+// The filter has no nulls, so we plow through its data as fast as
+// possible.
+BitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset, 
filter.length);
+while (position < filter.length) {
+  BitBlockCount block = bit_counter.NextFourWords();
+  output_size += block.popcount;
+  position += block.length;
+}
+  }
+  return output_size;
+}
+
+template 
+Result> GetTakeIndicesImpl(
+const ArrayData& filter, FilterOptions::NullSelectionBehavior 
null_selection,
+MemoryPool* memory_pool) {
+  using T = typename IndexType::c_type;
+  typename TypeTraits::BuilderType builder(memory_pool);
+
+  const uint8_t* filter_data = filter.buffers[1]->data();
+  BitBlockCounter data_counter(filter_data, filter.offset, filter.length);
+
+  // The position relative to the start of the filter
+  T position = 0;
+
+  // The current position taking the filter offset into account
+  int64_t position_with_offset = filter.offset;
+  if (filter.GetNullCount() > 0) {

Review comment:
   Someone can do it as follow up (and keeping an eye on the benchmarks to 
avoid perf regressions), it doesn't feel like a good use of my time when there 
are so many things to do for 1.0.0. 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-17 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r441656051



##
File path: cpp/src/arrow/compute/kernels/util_internal.h
##
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include 
+
+#include "arrow/buffer.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+// An internal data structure for unpacking a primitive argument to pass to a
+// kernel implementation
+struct PrimitiveArg {
+  const uint8_t* is_valid;
+  const uint8_t* data;

Review comment:
   done





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-17 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r441658427



##
File path: cpp/src/arrow/testing/random.cc
##
@@ -84,7 +84,7 @@ std::shared_ptr RandomArrayGenerator::Boolean(int64_t 
size, double probab
 
   BufferVector buffers{2};
   // Need 2 distinct generators such that probabilities are not shared.
-  GenOpt value_gen(seed(), 0, 1, probability);
+  GenOpt value_gen(seed(), 0, 1, 1 - probability);

Review comment:
   Done, and renamed the parameters to be clear that it's the "true" 
probaility





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-17 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r441659681



##
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##
@@ -0,0 +1,1816 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include 
+#include 
+#include 
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/array_dict.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/concatenate.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/extension_type.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/int_util.h"
+
+namespace arrow {
+
+using internal::BinaryBitBlockCounter;
+using internal::BitBlockCount;
+using internal::BitBlockCounter;
+using internal::BitmapReader;
+using internal::CopyBitmap;
+using internal::GetArrayView;
+using internal::IndexBoundsCheck;
+using internal::OptionalBitBlockCounter;
+using internal::OptionalBitIndexer;
+
+namespace compute {
+namespace internal {
+
+int64_t GetFilterOutputSize(const ArrayData& filter,
+FilterOptions::NullSelectionBehavior 
null_selection) {
+  int64_t output_size = 0;
+  int64_t position = 0;
+  if (filter.GetNullCount() > 0) {
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+BinaryBitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset,
+  filter_is_valid, filter.offset, 
filter.length);
+if (null_selection == FilterOptions::EMIT_NULL) {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextOrNotWord();
+output_size += block.popcount;
+position += block.length;
+  }
+} else {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextAndWord();
+output_size += block.popcount;
+position += block.length;
+  }
+}
+  } else {
+// The filter has no nulls, so we plow through its data as fast as
+// possible.
+BitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset, 
filter.length);

Review comment:
   Changing to use CountSetBits, which should be faster





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-17 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r441660523



##
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##
@@ -0,0 +1,1816 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include 
+#include 
+#include 
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/array_dict.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/concatenate.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/extension_type.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/int_util.h"
+
+namespace arrow {
+
+using internal::BinaryBitBlockCounter;
+using internal::BitBlockCount;
+using internal::BitBlockCounter;
+using internal::BitmapReader;
+using internal::CopyBitmap;
+using internal::GetArrayView;
+using internal::IndexBoundsCheck;
+using internal::OptionalBitBlockCounter;
+using internal::OptionalBitIndexer;
+
+namespace compute {
+namespace internal {
+
+int64_t GetFilterOutputSize(const ArrayData& filter,
+FilterOptions::NullSelectionBehavior 
null_selection) {
+  int64_t output_size = 0;
+  int64_t position = 0;
+  if (filter.GetNullCount() > 0) {
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+BinaryBitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset,

Review comment:
   Maybe. Let's leave this for follow up because I would need some 
benchmarks to be comfortable doing this





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-17 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r441660992



##
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##
@@ -0,0 +1,1816 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include 
+#include 
+#include 
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/array_dict.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/concatenate.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/extension_type.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/int_util.h"
+
+namespace arrow {
+
+using internal::BinaryBitBlockCounter;
+using internal::BitBlockCount;
+using internal::BitBlockCounter;
+using internal::BitmapReader;
+using internal::CopyBitmap;
+using internal::GetArrayView;
+using internal::IndexBoundsCheck;
+using internal::OptionalBitBlockCounter;
+using internal::OptionalBitIndexer;
+
+namespace compute {
+namespace internal {
+
+int64_t GetFilterOutputSize(const ArrayData& filter,
+FilterOptions::NullSelectionBehavior 
null_selection) {
+  int64_t output_size = 0;
+  int64_t position = 0;
+  if (filter.GetNullCount() > 0) {
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+BinaryBitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset,
+  filter_is_valid, filter.offset, 
filter.length);
+if (null_selection == FilterOptions::EMIT_NULL) {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextOrNotWord();
+output_size += block.popcount;
+position += block.length;
+  }
+} else {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextAndWord();
+output_size += block.popcount;
+position += block.length;
+  }
+}
+  } else {
+// The filter has no nulls, so we plow through its data as fast as
+// possible.
+BitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset, 
filter.length);
+while (position < filter.length) {
+  BitBlockCount block = bit_counter.NextFourWords();
+  output_size += block.popcount;
+  position += block.length;
+}
+  }
+  return output_size;
+}
+
+template 
+Result> GetTakeIndicesImpl(
+const ArrayData& filter, FilterOptions::NullSelectionBehavior 
null_selection,
+MemoryPool* memory_pool) {
+  using T = typename IndexType::c_type;
+  typename TypeTraits::BuilderType builder(memory_pool);
+
+  const uint8_t* filter_data = filter.buffers[1]->data();
+  BitBlockCounter data_counter(filter_data, filter.offset, filter.length);
+
+  // The position relative to the start of the filter
+  T position = 0;
+
+  // The current position taking the filter offset into account
+  int64_t position_with_offset = filter.offset;
+  if (filter.GetNullCount() > 0) {
+// The filter has nulls, so we scan the validity bitmap and the filter data
+// bitmap together, branching on the null selection type.
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+
+// To count blocks whether filter_data[i] || !filter_is_valid[i]
+BinaryBitBlockCounter filter_counter(filter_data, filter.offset, 
filter_is_valid,
+ filter.offset, filter.length);
+if (null_selection == FilterOptions::DROP) {
+  while (position < filter.length) {
+BitBlockCount and_block = filter_counter.NextAndWord();
+RETURN_NOT_OK(builder.Reserve(and_block.popcount));
+if (and_block.AllSet()) {
+  // All the values are selected and non-null
+  for (int64_t i = 0; i < and_block.length; ++i) {
+builder.UnsafeAppend(position++);
+  }
+  position_with_offset += and_block.length;
+ 

[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-17 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r441661392



##
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##
@@ -0,0 +1,1816 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include 
+#include 
+#include 
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/array_dict.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/concatenate.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/extension_type.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/int_util.h"
+
+namespace arrow {
+
+using internal::BinaryBitBlockCounter;
+using internal::BitBlockCount;
+using internal::BitBlockCounter;
+using internal::BitmapReader;
+using internal::CopyBitmap;
+using internal::GetArrayView;
+using internal::IndexBoundsCheck;
+using internal::OptionalBitBlockCounter;
+using internal::OptionalBitIndexer;
+
+namespace compute {
+namespace internal {
+
+int64_t GetFilterOutputSize(const ArrayData& filter,
+FilterOptions::NullSelectionBehavior 
null_selection) {
+  int64_t output_size = 0;
+  int64_t position = 0;
+  if (filter.GetNullCount() > 0) {
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+BinaryBitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset,
+  filter_is_valid, filter.offset, 
filter.length);
+if (null_selection == FilterOptions::EMIT_NULL) {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextOrNotWord();
+output_size += block.popcount;
+position += block.length;
+  }
+} else {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextAndWord();
+output_size += block.popcount;
+position += block.length;
+  }
+}
+  } else {
+// The filter has no nulls, so we plow through its data as fast as
+// possible.
+BitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset, 
filter.length);
+while (position < filter.length) {
+  BitBlockCount block = bit_counter.NextFourWords();
+  output_size += block.popcount;
+  position += block.length;
+}
+  }
+  return output_size;
+}
+
+template 
+Result> GetTakeIndicesImpl(
+const ArrayData& filter, FilterOptions::NullSelectionBehavior 
null_selection,
+MemoryPool* memory_pool) {
+  using T = typename IndexType::c_type;
+  typename TypeTraits::BuilderType builder(memory_pool);
+
+  const uint8_t* filter_data = filter.buffers[1]->data();
+  BitBlockCounter data_counter(filter_data, filter.offset, filter.length);
+
+  // The position relative to the start of the filter
+  T position = 0;
+
+  // The current position taking the filter offset into account
+  int64_t position_with_offset = filter.offset;
+  if (filter.GetNullCount() > 0) {
+// The filter has nulls, so we scan the validity bitmap and the filter data
+// bitmap together, branching on the null selection type.
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+
+// To count blocks whether filter_data[i] || !filter_is_valid[i]
+BinaryBitBlockCounter filter_counter(filter_data, filter.offset, 
filter_is_valid,
+ filter.offset, filter.length);
+if (null_selection == FilterOptions::DROP) {
+  while (position < filter.length) {
+BitBlockCount and_block = filter_counter.NextAndWord();
+RETURN_NOT_OK(builder.Reserve(and_block.popcount));
+if (and_block.AllSet()) {
+  // All the values are selected and non-null
+  for (int64_t i = 0; i < and_block.length; ++i) {
+builder.UnsafeAppend(position++);
+  }
+  position_with_offset += and_block.length;
+ 

[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-17 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r441662165



##
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##
@@ -0,0 +1,1816 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include 
+#include 
+#include 
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/array_dict.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/concatenate.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/extension_type.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/int_util.h"
+
+namespace arrow {
+
+using internal::BinaryBitBlockCounter;
+using internal::BitBlockCount;
+using internal::BitBlockCounter;
+using internal::BitmapReader;
+using internal::CopyBitmap;
+using internal::GetArrayView;
+using internal::IndexBoundsCheck;
+using internal::OptionalBitBlockCounter;
+using internal::OptionalBitIndexer;
+
+namespace compute {
+namespace internal {
+
+int64_t GetFilterOutputSize(const ArrayData& filter,
+FilterOptions::NullSelectionBehavior 
null_selection) {
+  int64_t output_size = 0;
+  int64_t position = 0;
+  if (filter.GetNullCount() > 0) {
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+BinaryBitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset,
+  filter_is_valid, filter.offset, 
filter.length);
+if (null_selection == FilterOptions::EMIT_NULL) {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextOrNotWord();
+output_size += block.popcount;
+position += block.length;
+  }
+} else {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextAndWord();
+output_size += block.popcount;
+position += block.length;
+  }
+}
+  } else {
+// The filter has no nulls, so we plow through its data as fast as
+// possible.
+BitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset, 
filter.length);
+while (position < filter.length) {
+  BitBlockCount block = bit_counter.NextFourWords();
+  output_size += block.popcount;
+  position += block.length;
+}
+  }
+  return output_size;
+}
+
+template 
+Result> GetTakeIndicesImpl(
+const ArrayData& filter, FilterOptions::NullSelectionBehavior 
null_selection,
+MemoryPool* memory_pool) {
+  using T = typename IndexType::c_type;
+  typename TypeTraits::BuilderType builder(memory_pool);
+
+  const uint8_t* filter_data = filter.buffers[1]->data();
+  BitBlockCounter data_counter(filter_data, filter.offset, filter.length);
+
+  // The position relative to the start of the filter
+  T position = 0;
+
+  // The current position taking the filter offset into account
+  int64_t position_with_offset = filter.offset;
+  if (filter.GetNullCount() > 0) {
+// The filter has nulls, so we scan the validity bitmap and the filter data
+// bitmap together, branching on the null selection type.
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+
+// To count blocks whether filter_data[i] || !filter_is_valid[i]
+BinaryBitBlockCounter filter_counter(filter_data, filter.offset, 
filter_is_valid,
+ filter.offset, filter.length);
+if (null_selection == FilterOptions::DROP) {
+  while (position < filter.length) {
+BitBlockCount and_block = filter_counter.NextAndWord();
+RETURN_NOT_OK(builder.Reserve(and_block.popcount));
+if (and_block.AllSet()) {
+  // All the values are selected and non-null
+  for (int64_t i = 0; i < and_block.length; ++i) {
+builder.UnsafeAppend(position++);
+  }
+  position_with_offset += and_block.length;
+ 

[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-17 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r441668163



##
File path: cpp/src/arrow/compute/api_vector.h
##
@@ -64,6 +67,24 @@ Result Filter(const Datum& values, const Datum& 
filter,
  const FilterOptions& options = FilterOptions::Defaults(),
  ExecContext* ctx = NULLPTR);
 
+namespace internal {
+
+// These internal functions are implemented in kernels/vector_selection.cc
+
+/// \brief Return the number of selected indices in the boolean filter
+ARROW_EXPORT
+int64_t GetFilterOutputSize(const ArrayData& filter,

Review comment:
   OK





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] wesm commented on a change in pull request #7442: ARROW-9075: [C++] Optimized Filter implementation: faster performance + compilation, smaller code size

2020-06-17 Thread GitBox


wesm commented on a change in pull request #7442:
URL: https://github.com/apache/arrow/pull/7442#discussion_r441674808



##
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##
@@ -0,0 +1,1816 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include 
+#include 
+#include 
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/array_dict.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/concatenate.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/extension_type.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/int_util.h"
+
+namespace arrow {
+
+using internal::BinaryBitBlockCounter;
+using internal::BitBlockCount;
+using internal::BitBlockCounter;
+using internal::BitmapReader;
+using internal::CopyBitmap;
+using internal::GetArrayView;
+using internal::IndexBoundsCheck;
+using internal::OptionalBitBlockCounter;
+using internal::OptionalBitIndexer;
+
+namespace compute {
+namespace internal {
+
+int64_t GetFilterOutputSize(const ArrayData& filter,
+FilterOptions::NullSelectionBehavior 
null_selection) {
+  int64_t output_size = 0;
+  int64_t position = 0;
+  if (filter.GetNullCount() > 0) {
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+BinaryBitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset,
+  filter_is_valid, filter.offset, 
filter.length);
+if (null_selection == FilterOptions::EMIT_NULL) {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextOrNotWord();
+output_size += block.popcount;
+position += block.length;
+  }
+} else {
+  while (position < filter.length) {
+BitBlockCount block = bit_counter.NextAndWord();
+output_size += block.popcount;
+position += block.length;
+  }
+}
+  } else {
+// The filter has no nulls, so we plow through its data as fast as
+// possible.
+BitBlockCounter bit_counter(filter.buffers[1]->data(), filter.offset, 
filter.length);
+while (position < filter.length) {
+  BitBlockCount block = bit_counter.NextFourWords();
+  output_size += block.popcount;
+  position += block.length;
+}
+  }
+  return output_size;
+}
+
+template 
+Result> GetTakeIndicesImpl(
+const ArrayData& filter, FilterOptions::NullSelectionBehavior 
null_selection,
+MemoryPool* memory_pool) {
+  using T = typename IndexType::c_type;
+  typename TypeTraits::BuilderType builder(memory_pool);
+
+  const uint8_t* filter_data = filter.buffers[1]->data();
+  BitBlockCounter data_counter(filter_data, filter.offset, filter.length);
+
+  // The position relative to the start of the filter
+  T position = 0;
+
+  // The current position taking the filter offset into account
+  int64_t position_with_offset = filter.offset;
+  if (filter.GetNullCount() > 0) {
+// The filter has nulls, so we scan the validity bitmap and the filter data
+// bitmap together, branching on the null selection type.
+const uint8_t* filter_is_valid = filter.buffers[0]->data();
+
+// To count blocks whether filter_data[i] || !filter_is_valid[i]
+BinaryBitBlockCounter filter_counter(filter_data, filter.offset, 
filter_is_valid,
+ filter.offset, filter.length);
+if (null_selection == FilterOptions::DROP) {
+  while (position < filter.length) {
+BitBlockCount and_block = filter_counter.NextAndWord();
+RETURN_NOT_OK(builder.Reserve(and_block.popcount));
+if (and_block.AllSet()) {
+  // All the values are selected and non-null
+  for (int64_t i = 0; i < and_block.length; ++i) {
+builder.UnsafeAppend(position++);
+  }
+  position_with_offset += and_block.length;
+