This is an automated email from the ASF dual-hosted git repository.
sgilmore pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new c4e201329e GH-38214: [MATLAB] Add a common `arrow.tabular.Tabular`
MATLAB interface (#47014)
c4e201329e is described below
commit c4e201329e7ba78f8ca4a4d63e8c26ebc061b12e
Author: Sarah Gilmore <[email protected]>
AuthorDate: Mon Jul 7 14:22:09 2025 -0400
GH-38214: [MATLAB] Add a common `arrow.tabular.Tabular` MATLAB interface
(#47014)
### Rationale for this change
Currently, the properties and methods of `arrow.tabular.RecordBatch` and
`arrow.tabular.Table` are very similar. To simplify implementation of these
classes, reduce code duplication, and ensure design consistency, it might be
helpful to factor out the common tabular functionality into an
`arrow.tabular.Tabular` interface that both classes implement.
### What changes are included in this PR?
1. Defined a `arrow.tabular.Tabular` interface that contains the tabular
functionality shared by `arrow.tabular.Table` and `arrow.tabular.RecordBatch`.
2. Updated `arrow.tabular.Table` and `arrow.tabular.RecordBatch` to
implement the `arrow.tabular.Tabular` interface.
### Are these changes tested?
Yes. These changes are covered by existing cases defined in `tTable.m` and
`tRecordBatch`.
### Are there any user-facing changes?
No.
* GitHub Issue: #38214
Lead-authored-by: Sarah Gilmore <[email protected]>
Co-authored-by: Sarah Gilmore <[email protected]>
Co-authored-by: Kevin Gurney <[email protected]>
Signed-off-by: Sarah Gilmore <[email protected]>
---
matlab/src/matlab/+arrow/+tabular/RecordBatch.m | 99 +++-------------------
matlab/src/matlab/+arrow/+tabular/Table.m | 97 ++-------------------
.../+arrow/+tabular/{RecordBatch.m => Tabular.m} | 92 ++++++--------------
3 files changed, 45 insertions(+), 243 deletions(-)
diff --git a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m
b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m
index 5c66c66c26..6210a48f90 100644
--- a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m
+++ b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m
@@ -1,7 +1,6 @@
%RECORDBATCH A tabular data structure representing a set of
%arrow.array.Array objects with a fixed schema.
-
% Licensed to the Apache Software Foundation (ASF) under one or more
% contributor license agreements. See the NOTICE file distributed with
% this work for additional information regarding copyright ownership.
@@ -17,90 +16,16 @@
% implied. See the License for the specific language governing
% permissions and limitations under the License.
-classdef RecordBatch < matlab.mixin.CustomDisplay & ...
- matlab.mixin.Scalar
-
- properties (Dependent, SetAccess=private, GetAccess=public)
- NumRows
- NumColumns
- ColumnNames
- Schema
- end
-
- properties (Hidden, SetAccess=private, GetAccess=public)
- Proxy
- end
+classdef RecordBatch < arrow.tabular.Tabular
methods
+
function obj = RecordBatch(proxy)
arguments
proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy,
"arrow.tabular.proxy.RecordBatch")}
end
import arrow.internal.proxy.validate
- obj.Proxy = proxy;
- end
-
- function numRows = get.NumRows(obj)
- numRows = obj.Proxy.getNumRows();
- end
-
- function numColumns = get.NumColumns(obj)
- numColumns = obj.Proxy.getNumColumns();
- end
-
- function columnNames = get.ColumnNames(obj)
- columnNames = obj.Proxy.getColumnNames();
- end
-
- function schema = get.Schema(obj)
- proxyID = obj.Proxy.getSchema();
- proxy = libmexclass.proxy.Proxy(Name="arrow.tabular.proxy.Schema",
ID=proxyID);
- schema = arrow.tabular.Schema(proxy);
- end
-
- function arrowArray = column(obj, idx)
- import arrow.internal.validate.*
-
- idx = index.numericOrString(idx, "int32", AllowNonScalar=false);
-
- if isnumeric(idx)
- args = struct(Index=idx);
- proxyInfo = obj.Proxy.getColumnByIndex(args);
- else
- args = struct(Name=idx);
- proxyInfo = obj.Proxy.getColumnByName(args);
- end
-
- traits = arrow.type.traits.traits(arrow.type.ID(proxyInfo.TypeID));
- proxy = libmexclass.proxy.Proxy(Name=traits.ArrayProxyClassName,
ID=proxyInfo.ProxyID);
- arrowArray = traits.ArrayConstructor(proxy);
- end
-
- function T = table(obj)
- import arrow.tabular.internal.*
-
- numColumns = obj.NumColumns;
- matlabArrays = cell(1, numColumns);
-
- for ii = 1:numColumns
- arrowArray = obj.column(ii);
- matlabArrays{ii} = toMATLAB(arrowArray);
- end
-
- validVariableNames = makeValidVariableNames(obj.ColumnNames);
- validDimensionNames = makeValidDimensionNames(validVariableNames);
-
- T = table(matlabArrays{:}, ...
- VariableNames=validVariableNames, ...
- DimensionNames=validDimensionNames);
- end
-
- function T = toMATLAB(obj)
- T = obj.table();
- end
-
- function tf = isequal(obj, varargin)
- tf = arrow.tabular.internal.isequal(obj, varargin{:});
+ [email protected](proxy);
end
function export(obj, cArrowArrayAddress, cArrowSchemaAddress)
@@ -115,23 +40,21 @@ classdef RecordBatch < matlab.mixin.CustomDisplay & ...
);
obj.Proxy.exportToC(args);
end
- end
- methods (Access = private)
- function str = toString(obj)
- str = obj.Proxy.toString();
- end
end
methods (Access=protected)
- function displayScalarObject(obj)
- className = matlab.mixin.CustomDisplay.getClassNameForHeader(obj);
- tabularDisplay =
arrow.tabular.internal.display.getTabularDisplay(obj, className);
- disp(tabularDisplay + newline);
+
+ function column = constructColumnFromProxy(~, proxyInfo)
+ traits = arrow.type.traits.traits(arrow.type.ID(proxyInfo.TypeID));
+ proxy = libmexclass.proxy.Proxy(Name=traits.ArrayProxyClassName,
ID=proxyInfo.ProxyID);
+ column = traits.ArrayConstructor(proxy);
end
+
end
methods (Static, Access=public)
+
function recordBatch = fromArrays(arrowArrays, opts)
arguments(Repeating)
arrowArrays(1, 1) arrow.array.Array
@@ -163,5 +86,7 @@ classdef RecordBatch < matlab.mixin.CustomDisplay & ...
importer = arrow.c.internal.RecordBatchImporter();
recordBatch = importer.import(cArray, cSchema);
end
+
end
+
end
diff --git a/matlab/src/matlab/+arrow/+tabular/Table.m
b/matlab/src/matlab/+arrow/+tabular/Table.m
index 48e6b09edc..f574c8c3af 100644
--- a/matlab/src/matlab/+arrow/+tabular/Table.m
+++ b/matlab/src/matlab/+arrow/+tabular/Table.m
@@ -1,6 +1,3 @@
-%TABLE A tabular data structure representing a set of
-% arrow.array.ChunkedArray objects with a fixed schema.
-
% Licensed to the Apache Software Foundation (ASF) under one or more
% contributor license agreements. See the NOTICE file distributed with
% this work for additional information regarding copyright ownership.
@@ -16,18 +13,7 @@
% implied. See the License for the specific language governing
% permissions and limitations under the License.
-classdef Table < matlab.mixin.CustomDisplay & matlab.mixin.Scalar
-
- properties (Dependent, SetAccess=private, GetAccess=public)
- NumRows
- NumColumns
- ColumnNames
- Schema
- end
-
- properties (Hidden, SetAccess=private, GetAccess=public)
- Proxy
- end
+classdef Table < arrow.tabular.Tabular
methods
@@ -36,88 +22,17 @@ classdef Table < matlab.mixin.CustomDisplay &
matlab.mixin.Scalar
proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy,
"arrow.tabular.proxy.Table")}
end
import arrow.internal.proxy.validate
- obj.Proxy = proxy;
- end
-
- function numColumns = get.NumColumns(obj)
- numColumns = obj.Proxy.getNumColumns();
- end
-
- function numRows = get.NumRows(obj)
- numRows = obj.Proxy.getNumRows();
- end
-
- function columnNames = get.ColumnNames(obj)
- columnNames = obj.Proxy.getColumnNames();
- end
-
- function schema = get.Schema(obj)
- proxyID = obj.Proxy.getSchema();
- proxy = libmexclass.proxy.Proxy(Name="arrow.tabular.proxy.Schema",
ID=proxyID);
- schema = arrow.tabular.Schema(proxy);
- end
-
- function chunkedArray = column(obj, idx)
- import arrow.internal.validate.*
-
- idx = index.numericOrString(idx, "int32", AllowNonScalar=false);
-
- if isnumeric(idx)
- args = struct(Index=idx);
- proxyID = obj.Proxy.getColumnByIndex(args);
- else
- args = struct(Name=idx);
- proxyID = obj.Proxy.getColumnByName(args);
- end
-
- proxy =
libmexclass.proxy.Proxy(Name="arrow.array.proxy.ChunkedArray", ID=proxyID);
- chunkedArray = arrow.array.ChunkedArray(proxy);
- end
-
- function T = table(obj)
- import arrow.tabular.internal.*
-
- numColumns = obj.NumColumns;
- matlabArrays = cell(1, numColumns);
-
- for ii = 1:numColumns
- chunkedArray = obj.column(ii);
- matlabArrays{ii} = toMATLAB(chunkedArray);
- end
-
- validVariableNames = makeValidVariableNames(obj.ColumnNames);
- validDimensionNames = makeValidDimensionNames(validVariableNames);
-
- T = table(matlabArrays{:}, ...
- VariableNames=validVariableNames, ...
- DimensionNames=validDimensionNames);
- end
-
- function T = toMATLAB(obj)
- T = obj.table();
- end
-
- function tf = isequal(obj, varargin)
- tf = arrow.tabular.internal.isequal(obj, varargin{:});
- end
-
- end
-
- methods (Access = private)
-
- function str = toString(obj)
- str = obj.Proxy.toString();
+ [email protected](proxy);
end
end
methods (Access=protected)
- function displayScalarObject(obj)
- className = matlab.mixin.CustomDisplay.getClassNameForHeader(obj);
- tabularDisplay =
arrow.tabular.internal.display.getTabularDisplay(obj, className);
- disp(tabularDisplay + newline);
- end
+ function column = constructColumnFromProxy(~, proxyInfo)
+ proxy =
libmexclass.proxy.Proxy(Name="arrow.array.proxy.ChunkedArray", ID=proxyInfo);
+ column = arrow.array.ChunkedArray(proxy);
+ end
end
methods (Static, Access=public)
diff --git a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m
b/matlab/src/matlab/+arrow/+tabular/Tabular.m
similarity index 56%
copy from matlab/src/matlab/+arrow/+tabular/RecordBatch.m
copy to matlab/src/matlab/+arrow/+tabular/Tabular.m
index 5c66c66c26..0617b1b612 100644
--- a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m
+++ b/matlab/src/matlab/+arrow/+tabular/Tabular.m
@@ -1,6 +1,4 @@
-%RECORDBATCH A tabular data structure representing a set of
-%arrow.array.Array objects with a fixed schema.
-
+%TABULAR Interface that represents a tabular data structure.
% Licensed to the Apache Software Foundation (ASF) under one or more
% contributor license agreements. See the NOTICE file distributed with
@@ -17,8 +15,7 @@
% implied. See the License for the specific language governing
% permissions and limitations under the License.
-classdef RecordBatch < matlab.mixin.CustomDisplay & ...
- matlab.mixin.Scalar
+classdef Tabular < matlab.mixin.CustomDisplay & matlab.mixin.Scalar
properties (Dependent, SetAccess=private, GetAccess=public)
NumRows
@@ -30,24 +27,33 @@ classdef RecordBatch < matlab.mixin.CustomDisplay & ...
properties (Hidden, SetAccess=private, GetAccess=public)
Proxy
end
+
+ methods(Access=protected, Abstract)
+ % constructColumnFromProxy must construct an instance of the
+ % appropriate MATLAB class from the proxyInfo argument. The
+ % template method arrow.tabular.Tabular/column() invokes this
+ % method.
+ column = constructColumnFromProxy(obj, proxyInfo)
+ end
methods
- function obj = RecordBatch(proxy)
+
+ function obj = Tabular(proxy)
arguments
- proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy,
"arrow.tabular.proxy.RecordBatch")}
+ proxy(1, 1) libmexclass.proxy.Proxy
end
import arrow.internal.proxy.validate
obj.Proxy = proxy;
end
- function numRows = get.NumRows(obj)
- numRows = obj.Proxy.getNumRows();
- end
-
function numColumns = get.NumColumns(obj)
numColumns = obj.Proxy.getNumColumns();
end
+ function numRows = get.NumRows(obj)
+ numRows = obj.Proxy.getNumRows();
+ end
+
function columnNames = get.ColumnNames(obj)
columnNames = obj.Proxy.getColumnNames();
end
@@ -58,7 +64,7 @@ classdef RecordBatch < matlab.mixin.CustomDisplay & ...
schema = arrow.tabular.Schema(proxy);
end
- function arrowArray = column(obj, idx)
+ function array = column(obj, idx)
import arrow.internal.validate.*
idx = index.numericOrString(idx, "int32", AllowNonScalar=false);
@@ -70,10 +76,8 @@ classdef RecordBatch < matlab.mixin.CustomDisplay & ...
args = struct(Name=idx);
proxyInfo = obj.Proxy.getColumnByName(args);
end
-
- traits = arrow.type.traits.traits(arrow.type.ID(proxyInfo.TypeID));
- proxy = libmexclass.proxy.Proxy(Name=traits.ArrayProxyClassName,
ID=proxyInfo.ProxyID);
- arrowArray = traits.ArrayConstructor(proxy);
+
+ array = obj.constructColumnFromProxy(proxyInfo);
end
function T = table(obj)
@@ -81,10 +85,9 @@ classdef RecordBatch < matlab.mixin.CustomDisplay & ...
numColumns = obj.NumColumns;
matlabArrays = cell(1, numColumns);
-
+
for ii = 1:numColumns
- arrowArray = obj.column(ii);
- matlabArrays{ii} = toMATLAB(arrowArray);
+ matlabArrays{ii} = toMATLAB(obj.column(ii));
end
validVariableNames = makeValidVariableNames(obj.ColumnNames);
@@ -103,65 +106,24 @@ classdef RecordBatch < matlab.mixin.CustomDisplay & ...
tf = arrow.tabular.internal.isequal(obj, varargin{:});
end
- function export(obj, cArrowArrayAddress, cArrowSchemaAddress)
- arguments
- obj(1, 1) arrow.tabular.RecordBatch
- cArrowArrayAddress(1, 1) uint64
- cArrowSchemaAddress(1, 1) uint64
- end
- args = struct(...
- ArrowArrayAddress=cArrowArrayAddress,...
- ArrowSchemaAddress=cArrowSchemaAddress...
- );
- obj.Proxy.exportToC(args);
- end
end
methods (Access = private)
+
function str = toString(obj)
str = obj.Proxy.toString();
end
+
end
methods (Access=protected)
+
function displayScalarObject(obj)
className = matlab.mixin.CustomDisplay.getClassNameForHeader(obj);
tabularDisplay =
arrow.tabular.internal.display.getTabularDisplay(obj, className);
disp(tabularDisplay + newline);
end
- end
- methods (Static, Access=public)
- function recordBatch = fromArrays(arrowArrays, opts)
- arguments(Repeating)
- arrowArrays(1, 1) arrow.array.Array
- end
- arguments
- opts.ColumnNames(1, :) string {mustBeNonmissing} =
compose("Column%d", 1:numel(arrowArrays))
- end
-
- import arrow.tabular.internal.validateArrayLengths
- import arrow.tabular.internal.validateColumnNames
- import arrow.array.internal.getArrayProxyIDs
-
- numColumns = numel(arrowArrays);
- validateArrayLengths(arrowArrays);
- validateColumnNames(opts.ColumnNames, numColumns);
-
- arrayProxyIDs = getArrayProxyIDs(arrowArrays);
- args = struct(ArrayProxyIDs=arrayProxyIDs,
ColumnNames=opts.ColumnNames);
- proxyName = "arrow.tabular.proxy.RecordBatch";
- proxy = arrow.internal.proxy.create(proxyName, args);
- recordBatch = arrow.tabular.RecordBatch(proxy);
- end
-
- function recordBatch = import(cArray, cSchema)
- arguments
- cArray(1, 1) arrow.c.Array
- cSchema(1, 1) arrow.c.Schema
- end
- importer = arrow.c.internal.RecordBatchImporter();
- recordBatch = importer.import(cArray, cSchema);
- end
end
-end
+
+end
\ No newline at end of file