rok commented on code in PR #47586:
URL: https://github.com/apache/arrow/pull/47586#discussion_r2378745142
##########
cpp/src/arrow/tensor/csx_converter.cc:
##########
@@ -70,47 +74,34 @@ class SparseCSXMatrixConverter : private
SparseTensorConverterMixin {
ARROW_ASSIGN_OR_RAISE(auto values_buffer,
AllocateBuffer(value_elsize * nonzero_count, pool_));
- auto* values = values_buffer->mutable_data();
-
- const auto* tensor_data = tensor_.raw_data();
-
- if (ndim <= 1) {
- return Status::NotImplemented("TODO for ndim <= 1");
- } else {
- ARROW_ASSIGN_OR_RAISE(indptr_buffer,
- AllocateBuffer(index_elsize * (n_major + 1),
pool_));
- auto* indptr = indptr_buffer->mutable_data();
-
- ARROW_ASSIGN_OR_RAISE(indices_buffer,
- AllocateBuffer(index_elsize * nonzero_count,
pool_));
- auto* indices = indices_buffer->mutable_data();
-
- std::vector<int64_t> coords(2);
- int64_t k = 0;
- std::fill_n(indptr, index_elsize, 0);
- indptr += index_elsize;
- for (int64_t i = 0; i < n_major; ++i) {
- for (int64_t j = 0; j < n_minor; ++j) {
- if (axis_ == SparseMatrixCompressedAxis::ROW) {
- coords = {i, j};
- } else {
- coords = {j, i};
- }
- const int64_t offset = tensor_.CalculateValueOffset(coords);
- if (std::any_of(tensor_data + offset, tensor_data + offset +
value_elsize,
- IsNonZero)) {
- std::copy_n(tensor_data + offset, value_elsize, values);
- values += value_elsize;
-
- AssignIndex(indices, j, index_elsize);
- indices += index_elsize;
-
- k++;
- }
+ ARROW_ASSIGN_OR_RAISE(indptr_buffer,
+ AllocateBuffer(index_elsize * (n_major + 1), pool_));
+ ARROW_ASSIGN_OR_RAISE(indices_buffer,
+ AllocateBuffer(index_elsize * nonzero_count, pool_));
+
+ auto* indptr = indptr_buffer->mutable_data_as<IndexCType>();
+ auto* values = values_buffer->mutable_data_as<ValueCType>();
+ auto* indices = indices_buffer->mutable_data_as<IndexCType>();
Review Comment:
Should we `std::fill_n` these with 0 as we used to `indptr`? I'm not sure
what the reasoning was there.
##########
cpp/src/arrow/tensor.cc:
##########
@@ -485,13 +486,14 @@ namespace {
template <typename TYPE>
int64_t StridedTensorCountNonZero(int dim_index, int64_t offset, const Tensor&
tensor) {
using c_type = typename TYPE::c_type;
- const c_type zero = c_type(0);
int64_t nnz = 0;
if (dim_index == tensor.ndim() - 1) {
for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) {
const auto* ptr = tensor.raw_data() + offset + i *
tensor.strides()[dim_index];
- auto& elem = *reinterpret_cast<const c_type*>(ptr);
- if (elem != zero) ++nnz;
+ if (auto& elem = *reinterpret_cast<const c_type*>(ptr);
+ internal::is_not_zero<TYPE>(elem)) {
Review Comment:
Maybe:
```suggestion
auto& elem = *reinterpret_cast<const c_type*>(ptr);
if (internal::is_not_zero<TYPE>(elem)) {
```
##########
cpp/src/arrow/sparse_tensor_test.cc:
##########
@@ -413,6 +413,99 @@ TEST_F(TestSparseCOOTensor, TestToTensor) {
ASSERT_TRUE(tensor.Equals(*dense_tensor));
}
+template <typename ValueType>
+class TestSparseCOOTensorCreationFromNegativeZero
+ : public TestSparseTensorBase<ValueType> {
+ public:
+ using ValueCType = typename ValueType::c_type;
+
+ void SetUp() override { type_ = TypeTraits<ValueType>::type_singleton(); }
+
+ void FromVector() {
+ std::vector<ValueCType> data{
+ -0.0, -0.0, 0.0, -0.0, 4.0, -0.0, -0.0, 0.0, -0.0, -1.0, -0.0, -0.0,
Review Comment:
Should we add `+0.0` to this and tests below too?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]