This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2df33de  ARROW-4571: [Format] Tensor.fbs file has multiple root_type 
declarations
2df33de is described below

commit 2df33dec5e8abc07aa6300bbd2ee634c57d8fcfe
Author: Kenta Murata <[email protected]>
AuthorDate: Mon Feb 18 10:20:33 2019 -0600

    ARROW-4571: [Format] Tensor.fbs file has multiple root_type declarations
    
    Author: Kenta Murata <[email protected]>
    
    Closes #3651 from mrkn/separate_sparse_tensor_format and squashes the 
following commits:
    
    760cefaa9 <Kenta Murata> Add format/SparseTensor.fbs
    1f92cfa5e <Kenta Murata> Separate SaprseTensor.fbs from Tensor.fbs
---
 cpp/src/arrow/ipc/CMakeLists.txt        |  1 +
 cpp/src/arrow/ipc/metadata-internal.cc  |  3 +-
 docs/source/format/README.rst           |  2 +-
 format/Message.fbs                      |  1 +
 format/{Tensor.fbs => SparseTensor.fbs} | 36 ++-----------
 format/Tensor.fbs                       | 93 ---------------------------------
 java/format/pom.xml                     |  1 +
 7 files changed, 9 insertions(+), 128 deletions(-)

diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index fccd53c..352b2de 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -58,6 +58,7 @@ set(FBS_SRC
     ${ARROW_SOURCE_DIR}/../format/File.fbs
     ${ARROW_SOURCE_DIR}/../format/Schema.fbs
     ${ARROW_SOURCE_DIR}/../format/Tensor.fbs
+    ${ARROW_SOURCE_DIR}/../format/SparseTensor.fbs
     ${CMAKE_CURRENT_SOURCE_DIR}/feather.fbs)
 
 foreach(FIL ${FBS_SRC})
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc 
b/cpp/src/arrow/ipc/metadata-internal.cc
index da67113..38d8eaa 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -28,7 +28,8 @@
 #include "arrow/io/interfaces.h"
 #include "arrow/ipc/File_generated.h"  // IWYU pragma: keep
 #include "arrow/ipc/Message_generated.h"
-#include "arrow/ipc/Tensor_generated.h"  // IWYU pragma: keep
+#include "arrow/ipc/SparseTensor_generated.h"  // IWYU pragma: keep
+#include "arrow/ipc/Tensor_generated.h"        // IWYU pragma: keep
 #include "arrow/ipc/message.h"
 #include "arrow/ipc/util.h"
 #include "arrow/sparse_tensor.h"
diff --git a/docs/source/format/README.rst b/docs/source/format/README.rst
index f2f770b..4044026 100644
--- a/docs/source/format/README.rst
+++ b/docs/source/format/README.rst
@@ -25,7 +25,7 @@ Currently, the Arrow specification consists of these pieces:
 - Logical Types, Schemas, and Record Batch Metadata (see Schema.fbs)
 - Encapsulated Messages (see Message.fbs)
 - Mechanics of messaging between Arrow systems (IPC, RPC, etc.) (see 
:doc:`IPC`)
-- Tensor (Multi-dimensional array) Metadata (see Tensor.fbs)
+- Tensor (Multi-dimensional array) Metadata (see Tensor.fbs and 
SparseTensor.fbs)
 
 The metadata currently uses Google's `flatbuffers library`_ for serializing a
 couple related pieces of information:
diff --git a/format/Message.fbs b/format/Message.fbs
index e14fdca..10adaaa 100644
--- a/format/Message.fbs
+++ b/format/Message.fbs
@@ -16,6 +16,7 @@
 // under the License.
 
 include "Schema.fbs";
+include "SparseTensor.fbs";
 include "Tensor.fbs";
 
 namespace org.apache.arrow.flatbuf;
diff --git a/format/Tensor.fbs b/format/SparseTensor.fbs
similarity index 80%
copy from format/Tensor.fbs
copy to format/SparseTensor.fbs
index e77b353..0a0c6c2 100644
--- a/format/Tensor.fbs
+++ b/format/SparseTensor.fbs
@@ -15,44 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-/// EXPERIMENTAL: Metadata for n-dimensional arrays, aka "tensors" or
-/// "ndarrays". Arrow implementations in general are not required to implement
-/// this type
+/// EXPERIMENTAL: Metadata for n-dimensional sparse arrays, aka "sparse 
tensors".
+/// Arrow implementations in general are not required to implement this type
 
-include "Schema.fbs";
+include "Tensor.fbs";
 
 namespace org.apache.arrow.flatbuf;
 
 /// ----------------------------------------------------------------------
-/// Data structures for dense tensors
-
-/// Shape data for a single axis in a tensor
-table TensorDim {
-  /// Length of dimension
-  size: long;
-
-  /// Name of the dimension, optional
-  name: string;
-}
-
-table Tensor {
-  /// The type of data contained in a value cell. Currently only fixed-width
-  /// value types are supported, no strings or nested types
-  type: Type;
-
-  /// The dimensions of the tensor, optionally named
-  shape: [TensorDim];
-
-  /// Non-negative byte offsets to advance one value cell along each dimension
-  strides: [long];
-
-  /// The location and size of the tensor's data
-  data: Buffer;
-}
-
-root_type Tensor;
-
-/// ----------------------------------------------------------------------
 /// EXPERIMENTAL: Data structures for sparse tensors
 
 /// Coodinate format of sparse tensor index.
diff --git a/format/Tensor.fbs b/format/Tensor.fbs
index e77b353..01a20c3 100644
--- a/format/Tensor.fbs
+++ b/format/Tensor.fbs
@@ -51,96 +51,3 @@ table Tensor {
 }
 
 root_type Tensor;
-
-/// ----------------------------------------------------------------------
-/// EXPERIMENTAL: Data structures for sparse tensors
-
-/// Coodinate format of sparse tensor index.
-table SparseTensorIndexCOO {
-  /// COO's index list are represented as a NxM matrix,
-  /// where N is the number of non-zero values,
-  /// and M is the number of dimensions of a sparse tensor.
-  /// indicesBuffer stores the location and size of this index matrix.
-  /// The type of index value is long, so the stride for the index matrix is 
unnecessary.
-  ///
-  /// For example, let X be a 2x3x4x5 tensor, and it has the following 6 
non-zero values:
-  ///
-  ///   X[0, 1, 2, 0] := 1
-  ///   X[1, 1, 2, 3] := 2
-  ///   X[0, 2, 1, 0] := 3
-  ///   X[0, 1, 3, 0] := 4
-  ///   X[0, 1, 2, 1] := 5
-  ///   X[1, 2, 0, 4] := 6
-  ///
-  /// In COO format, the index matrix of X is the following 4x6 matrix:
-  ///
-  ///   [[0, 0, 0, 0, 1, 1],
-  ///    [1, 1, 1, 2, 1, 2],
-  ///    [2, 2, 3, 1, 2, 0],
-  ///    [0, 1, 0, 0, 3, 4]]
-  ///
-  /// Note that the indices are sorted in lexcographical order.
-  indicesBuffer: Buffer;
-}
-
-/// Compressed Sparse Row format, that is matrix-specific.
-table SparseMatrixIndexCSR {
-  /// indptrBuffer stores the location and size of indptr array that
-  /// represents the range of the rows.
-  /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
-  /// The length of this array is 1 + (the number of rows), and the type
-  /// of index value is long.
-  ///
-  /// For example, let X be the following 6x4 matrix:
-  ///
-  ///   X := [[0, 1, 2, 0],
-  ///         [0, 0, 3, 0],
-  ///         [0, 4, 0, 5],
-  ///         [0, 0, 0, 0],
-  ///         [6, 0, 7, 8],
-  ///         [0, 9, 0, 0]].
-  ///
-  /// The array of non-zero values in X is:
-  ///
-  ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
-  ///
-  /// And the indptr of X is:
-  ///
-  ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
-  indptrBuffer: Buffer;
-
-  /// indicesBuffer stores the location and size of the array that
-  /// contains the column indices of the corresponding non-zero values.
-  /// The type of index value is long.
-  ///
-  /// For example, the indices of the above X is:
-  ///
-  ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
-  indicesBuffer: Buffer;
-}
-
-union SparseTensorIndex {
-  SparseTensorIndexCOO,
-  SparseMatrixIndexCSR
-}
-
-table SparseTensor {
-  /// The type of data contained in a value cell.
-  /// Currently only fixed-width value types are supported,
-  /// no strings or nested types.
-  type: Type;
-
-  /// The dimensions of the tensor, optionally named.
-  shape: [TensorDim];
-
-  /// The number of non-zero values in a sparse tensor.
-  non_zero_length: long;
-
-  /// Sparse tensor index
-  sparseIndex: SparseTensorIndex;
-
-  /// The location and size of the tensor's data
-  data: Buffer;
-}
-
-root_type SparseTensor;
diff --git a/java/format/pom.xml b/java/format/pom.xml
index 2c3dc03..5525cd3 100644
--- a/java/format/pom.xml
+++ b/java/format/pom.xml
@@ -106,6 +106,7 @@
               <argument>${flatc.generated.files}</argument>
               <argument>../../format/Schema.fbs</argument>
               <argument>../../format/Tensor.fbs</argument>
+              <argument>../../format/SparseTensor.fbs</argument>
               <argument>../../format/File.fbs</argument>
               <argument>../../format/Message.fbs</argument>
             </arguments>

Reply via email to