This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new f83a282 ARROW-4335: [C++] Better document sparse tensor support f83a282 is described below commit f83a2822db510d589c604904de264104eef04fd6 Author: Kenta Murata <m...@mrkn.jp> AuthorDate: Tue Mar 12 20:37:54 2019 -0500 ARROW-4335: [C++] Better document sparse tensor support I wrote descriptions for sparse tensor classes. Author: Kenta Murata <m...@mrkn.jp> Closes #3810 from mrkn/sparse_tensor_doc and squashes the following commits: 9c4fe1422 <Kenta Murata> Fix incorrect English 522a982f1 <Kenta Murata> Modify comments following review comments 83cab0377 <Kenta Murata> Put SparseIndexBase<> class in internal namespace 6cd13e2eb <Kenta Murata> Write descriptions of sparse tensor --- cpp/src/arrow/sparse_tensor.h | 69 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index ded3a6d..e622245 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -29,13 +29,18 @@ namespace arrow { // ---------------------------------------------------------------------- // SparseIndex class -/// \brief EXPERIMENTAL: Sparse tensor format enumeration struct SparseTensorFormat { + /// EXPERIMENTAL: The index format type of SparseTensor enum type { COO, CSR }; }; -/// \brief EXPERIMENTAL: The base class for representing index of non-zero -/// values in sparse tensor +/// \brief EXPERIMENTAL: The base class for the index of a sparse tensor +/// +/// SparseIndex describes where the non-zero elements are within a SparseTensor. +/// +/// There are several ways to represent this. The format_id is used to +/// distinguish what kind of representation is used. Each possible value of +/// format_id must have only one corresponding concrete subclass of SparseIndex. class ARROW_EXPORT SparseIndex { public: explicit SparseIndex(SparseTensorFormat::type format_id, int64_t non_zero_length) @@ -43,9 +48,14 @@ class ARROW_EXPORT SparseIndex { virtual ~SparseIndex() = default; + /// \brief Return the identifier of the format type SparseTensorFormat::type format_id() const { return format_id_; } + + /// \brief Return the number of non zero values in the sparse tensor related + /// to this sparse index int64_t non_zero_length() const { return non_zero_length_; } + /// \brief Return the string representation of the sparse index virtual std::string ToString() const = 0; protected: @@ -53,18 +63,23 @@ class ARROW_EXPORT SparseIndex { int64_t non_zero_length_; }; +namespace internal { template <typename SparseIndexType> class SparseIndexBase : public SparseIndex { public: explicit SparseIndexBase(int64_t non_zero_length) : SparseIndex(SparseIndexType::format_id, non_zero_length) {} }; +} // namespace internal // ---------------------------------------------------------------------- // SparseCOOIndex class -/// \brief EXPERIMENTAL: The index data for COO sparse tensor -class ARROW_EXPORT SparseCOOIndex : public SparseIndexBase<SparseCOOIndex> { +/// \brief EXPERIMENTAL: The index data for a COO sparse tensor +/// +/// A COO sparse index manages the location of its non-zero values by their +/// coordinates. +class ARROW_EXPORT SparseCOOIndex : public internal::SparseIndexBase<SparseCOOIndex> { public: using CoordsTensor = NumericTensor<Int64Type>; @@ -73,10 +88,13 @@ class ARROW_EXPORT SparseCOOIndex : public SparseIndexBase<SparseCOOIndex> { // Constructor with a column-major NumericTensor explicit SparseCOOIndex(const std::shared_ptr<CoordsTensor>& coords); + /// \brief Return a tensor that has the coordinates of the non-zero values const std::shared_ptr<CoordsTensor>& indices() const { return coords_; } + /// \brief Return a string representation of the sparse index std::string ToString() const override; + /// \brief Return whether the COO indices are equal bool Equals(const SparseCOOIndex& other) const { return indices()->Equals(*other.indices()); } @@ -88,8 +106,19 @@ class ARROW_EXPORT SparseCOOIndex : public SparseIndexBase<SparseCOOIndex> { // ---------------------------------------------------------------------- // SparseCSRIndex class -/// \brief EXPERIMENTAL: The index data for CSR sparse matrix -class ARROW_EXPORT SparseCSRIndex : public SparseIndexBase<SparseCSRIndex> { +/// \brief EXPERIMENTAL: The index data for a CSR sparse matrix +/// +/// A CSR sparse index manages the location of its non-zero values by two +/// vectors. +/// +/// The first vector, called indptr, represents the range of the rows; the i-th +/// row spans from indptr[i] to indptr[i+1] in the corresponding value vector. +/// So the length of an indptr vector is the number of rows + 1. +/// +/// The other vector, called indices, represents the column indices of the +/// corresponding non-zero values. So the length of an indices vector is same +/// as the number of non-zero-values. +class ARROW_EXPORT SparseCSRIndex : public internal::SparseIndexBase<SparseCSRIndex> { public: using IndexTensor = NumericTensor<Int64Type>; @@ -99,11 +128,16 @@ class ARROW_EXPORT SparseCSRIndex : public SparseIndexBase<SparseCSRIndex> { explicit SparseCSRIndex(const std::shared_ptr<IndexTensor>& indptr, const std::shared_ptr<IndexTensor>& indices); + /// \brief Return a 1D tensor of indptr vector const std::shared_ptr<IndexTensor>& indptr() const { return indptr_; } + + /// \brief Return a 1D tensor of indices vector const std::shared_ptr<IndexTensor>& indices() const { return indices_; } + /// \brief Return a string representation of the sparse index std::string ToString() const override; + /// \brief Return whether the CSR indices are equal bool Equals(const SparseCSRIndex& other) const { return indptr()->Equals(*other.indptr()) && indices()->Equals(*other.indices()); } @@ -123,32 +157,45 @@ class ARROW_EXPORT SparseTensor { SparseTensorFormat::type format_id() const { return sparse_index_->format_id(); } + /// \brief Return a value type of the sparse tensor std::shared_ptr<DataType> type() const { return type_; } + + /// \brief Return a buffer that contains the value vector of the sparse tensor std::shared_ptr<Buffer> data() const { return data_; } + /// \brief Return an immutable raw data pointer const uint8_t* raw_data() const { return data_->data(); } + + /// \brief Return a mutable raw data pointer uint8_t* raw_mutable_data() const { return data_->mutable_data(); } + /// \brief Return a shape vector of the sparse tensor const std::vector<int64_t>& shape() const { return shape_; } + /// \brief Return a sparse index of the sparse tensor const std::shared_ptr<SparseIndex>& sparse_index() const { return sparse_index_; } + /// \brief Return a number of dimensions of the sparse tensor int ndim() const { return static_cast<int>(shape_.size()); } + /// \brief Return a vector of dimension names const std::vector<std::string>& dim_names() const { return dim_names_; } + + /// \brief Return the name of the i-th dimension const std::string& dim_name(int i) const; - /// Total number of value cells in the sparse tensor + /// \brief Total number of value cells in the sparse tensor int64_t size() const; - /// Return true if the underlying data buffer is mutable + /// \brief Return true if the underlying data buffer is mutable bool is_mutable() const { return data_->is_mutable(); } - /// Total number of non-zero cells in the sparse tensor + /// \brief Total number of non-zero cells in the sparse tensor int64_t non_zero_length() const { return sparse_index_ ? sparse_index_->non_zero_length() : 0; } + /// \brief Return whether sparse tensors are equal bool Equals(const SparseTensor& other) const; protected: @@ -163,7 +210,7 @@ class ARROW_EXPORT SparseTensor { std::vector<int64_t> shape_; std::shared_ptr<SparseIndex> sparse_index_; - /// These names are optional + // These names are optional std::vector<std::string> dim_names_; };