This is an automated email from the ASF dual-hosted git repository.
kevingurney pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 081e354e6f GH-37996: [MATLAB] Add a static constructor method named
`fromMATLAB` to `arrow.array.StructArray` (#37998)
081e354e6f is described below
commit 081e354e6fc9ded8f7d3d19d8a785cff31fa5cfb
Author: sgilmore10 <[email protected]>
AuthorDate: Tue Oct 3 16:35:43 2023 -0400
GH-37996: [MATLAB] Add a static constructor method named `fromMATLAB` to
`arrow.array.StructArray` (#37998)
### Rationale for this change
Right now, the only way to construct an `arrow.array.StructArray` is to
call its static method `fromArrays` method. Doing so requires users to first
construct the individual field arrays before creating the `StructArray`.
```matlab
>> a1 = arrow.array([1 2 3 4]);
>> a2 = arrow.array(["A" "B" "C" "D"]);
>> s1 = arrow.array.StructArray.fromArrays(a1, a2, FieldNames=["Number"
"String"]);
>> class(s1)
ans =
'arrow.array.StructArray'
```
It would be nice if users could construct `StructArray`s from MATLAB
`table`s by either calling `arrow.array.StructArray.fromMATLAB()` or by passing
a `table` to `arrow.array()`:
```matlab
>> t = table([1 2 3 4]', ["A1" "A2" "A3" "A4"]', VariableNames=["Number",
"String"])
% Call fromMATLAB method
>> s1 = arrow.array.StructArray.fromMATLAB(t);
>> class(s1)
ans =
'arrow.array.StructArray'
% Pass table to arrow.array()
>> class(s2)
ans =
'arrow.array.StructArray'
```
### What changes are included in this PR?
1. Added static constructor method `fromMATLAB` to
`arrow.array.StructArray`. It accepts a `table` as input and optionally two
name-value pairs: `FieldNames` and `Valid`.
2. Set the `ArrayStaticConstructor` property of
`arrow.type.traits.StructTraits` to `@ arrow.array.StructArray.fromMATLAB`.
Previously, it was set to `missing`.
3. Updated `arrow.type.traits.traits(className)` to return `StructTraits`
if `className` is the string `"table"`.
4. Updated `arrow.array` to accept a MATLAB `table` as input and return an
`arrow.array.StructArray` if given a `table`.
5. Changed the signature of `arrow.array()` to accept `varargin` instead of
pre-determined name-value pairs. The name-value pairs accepted depends on the
type of array being constructed. For example, you can supply `TimeUnit` when
constructing an `arrow.array.TimestampArray`, but `TimeUnit` will not be
accepted when creating an `arrow.array.Int8Array`.
### Are these changes tested?
Yes. Added new tests cases to `tArray.m`, `tStructArray.m`, `ttraits.m`,
and `tStructTraits.m`.
### Are there any user-facing changes?
Yes, users can now create `StructArray`s directly from MATLAB `table`s by
calling either `arrow.array()` or `arrow.array.StructArray.fromMATLAB`.
* Closes: #37996
Authored-by: Sarah Gilmore <[email protected]>
Signed-off-by: Kevin Gurney <[email protected]>
---
matlab/src/matlab/+arrow/+array/StructArray.m | 33 +++++++++
.../src/matlab/+arrow/+type/+traits/StructTraits.m | 5 +-
matlab/src/matlab/+arrow/+type/+traits/traits.m | 2 +
matlab/src/matlab/+arrow/array.m | 38 +++++-----
matlab/test/arrow/array/tArray.m | 5 +-
matlab/test/arrow/array/tStructArray.m | 86 ++++++++++++++++++++++
matlab/test/arrow/type/traits/tStructTraits.m | 2 +-
matlab/test/arrow/type/traits/ttraits.m | 11 +++
8 files changed, 154 insertions(+), 28 deletions(-)
diff --git a/matlab/src/matlab/+arrow/+array/StructArray.m
b/matlab/src/matlab/+arrow/+array/StructArray.m
index 589e39fecd..800e34fe74 100644
--- a/matlab/src/matlab/+arrow/+array/StructArray.m
+++ b/matlab/src/matlab/+arrow/+array/StructArray.m
@@ -142,5 +142,38 @@ classdef StructArray < arrow.array.Array
proxy = arrow.internal.proxy.create(proxyName, args);
array = arrow.array.StructArray(proxy);
end
+
+ function array = fromMATLAB(T, opts)
+ arguments
+ T table
+ opts.FieldNames(1, :) string {mustBeNonmissing} =
T.Properties.VariableNames
+ opts.Valid
+ end
+
+ import arrow.tabular.internal.decompose
+ import arrow.tabular.internal.validateColumnNames
+ import arrow.array.internal.getArrayProxyIDs
+ import arrow.internal.validate.parseValid
+
+ if width(T) == 0
+ % StructArrays require at least one field
+ error("arrow:struct:ZeroVariables", ...
+ "Input table T must have at least one variable.");
+ end
+
+ % If FieldNames was provided, make sure the number of field
+ % names is equal to the width of the table.
+ validateColumnNames(opts.FieldNames, width(T));
+
+ arrowArrays = decompose(T);
+ arrayProxyIDs = getArrayProxyIDs(arrowArrays);
+ validElements = parseValid(opts, height(T));
+
+ args = struct(ArrayProxyIDs=arrayProxyIDs, ...
+ FieldNames=opts.FieldNames, Valid=validElements);
+ proxyName = "arrow.array.proxy.StructArray";
+ proxy = arrow.internal.proxy.create(proxyName, args);
+ array = arrow.array.StructArray(proxy);
+ end
end
end
\ No newline at end of file
diff --git a/matlab/src/matlab/+arrow/+type/+traits/StructTraits.m
b/matlab/src/matlab/+arrow/+type/+traits/StructTraits.m
index 0f8b7b3a2a..adab036f27 100644
--- a/matlab/src/matlab/+arrow/+type/+traits/StructTraits.m
+++ b/matlab/src/matlab/+arrow/+type/+traits/StructTraits.m
@@ -19,10 +19,7 @@ classdef StructTraits < arrow.type.traits.TypeTraits
ArrayConstructor = @arrow.array.StructArray
ArrayClassName = "arrow.array.StructArray"
ArrayProxyClassName = "arrow.array.proxy.StructArray"
-
- % TODO: Implement fromMATLAB
- ArrayStaticConstructor = missing
-
+ ArrayStaticConstructor = @arrow.array.StructArray.fromMATLAB
TypeConstructor = @arrow.type.StructType
TypeClassName = "arrow.type.StructType"
TypeProxyClassName = "arrow.type.proxy.StructType"
diff --git a/matlab/src/matlab/+arrow/+type/+traits/traits.m
b/matlab/src/matlab/+arrow/+type/+traits/traits.m
index f737108ce5..9badf63eeb 100644
--- a/matlab/src/matlab/+arrow/+type/+traits/traits.m
+++ b/matlab/src/matlab/+arrow/+type/+traits/traits.m
@@ -91,6 +91,8 @@ function typeTraits = traits(type)
typeTraits = TimestampTraits();
case "duration"
typeTraits = Time64Traits();
+ case "table"
+ typeTraits = StructTraits();
otherwise
error("arrow:type:traits:UnsupportedMatlabClass", "Unsupported
MATLAB class: " + type);
end
diff --git a/matlab/src/matlab/+arrow/array.m b/matlab/src/matlab/+arrow/array.m
index 983b3c8868..e34eb8b3fc 100644
--- a/matlab/src/matlab/+arrow/array.m
+++ b/matlab/src/matlab/+arrow/array.m
@@ -13,46 +13,42 @@
% implied. See the License for the specific language governing
% permissions and limitations under the License.
-function arrowArray = array(data, opts)
- arguments
- data
- opts.InferNulls(1, 1) logical = true
- opts.Valid
- end
+function arrowArray = array(data, varargin)
data = convertCellstrToString(data);
classname = string(class(data));
- args = namedargs2cell(opts);
switch (classname)
case "logical"
- arrowArray = arrow.array.BooleanArray.fromMATLAB(data, args{:});
+ arrowArray = arrow.array.BooleanArray.fromMATLAB(data,
varargin{:});
case "uint8"
- arrowArray = arrow.array.UInt8Array.fromMATLAB(data, args{:});
+ arrowArray = arrow.array.UInt8Array.fromMATLAB(data, varargin{:});
case "uint16"
- arrowArray = arrow.array.UInt16Array.fromMATLAB(data, args{:});
+ arrowArray = arrow.array.UInt16Array.fromMATLAB(data, varargin{:});
case "uint32"
- arrowArray = arrow.array.UInt32Array.fromMATLAB(data, args{:});
+ arrowArray = arrow.array.UInt32Array.fromMATLAB(data, varargin{:});
case "uint64"
- arrowArray = arrow.array.UInt64Array.fromMATLAB(data, args{:});
+ arrowArray = arrow.array.UInt64Array.fromMATLAB(data, varargin{:});
case "int8"
- arrowArray = arrow.array.Int8Array.fromMATLAB(data, args{:});
+ arrowArray = arrow.array.Int8Array.fromMATLAB(data, varargin{:});
case "int16"
- arrowArray = arrow.array.Int16Array.fromMATLAB(data, args{:});
+ arrowArray = arrow.array.Int16Array.fromMATLAB(data, varargin{:});
case "int32"
- arrowArray = arrow.array.Int32Array.fromMATLAB(data, args{:});
+ arrowArray = arrow.array.Int32Array.fromMATLAB(data, varargin{:});
case "int64"
- arrowArray = arrow.array.Int64Array.fromMATLAB(data, args{:});
+ arrowArray = arrow.array.Int64Array.fromMATLAB(data, varargin{:});
case "single"
- arrowArray = arrow.array.Float32Array.fromMATLAB(data, args{:});
+ arrowArray = arrow.array.Float32Array.fromMATLAB(data,
varargin{:});
case "double"
- arrowArray = arrow.array.Float64Array.fromMATLAB(data, args{:});
+ arrowArray = arrow.array.Float64Array.fromMATLAB(data,
varargin{:});
case "string"
- arrowArray = arrow.array.StringArray.fromMATLAB(data, args{:});
+ arrowArray = arrow.array.StringArray.fromMATLAB(data, varargin{:});
case "datetime"
- arrowArray = arrow.array.TimestampArray.fromMATLAB(data, args{:});
+ arrowArray = arrow.array.TimestampArray.fromMATLAB(data,
varargin{:});
case "duration"
- arrowArray = arrow.array.Time64Array.fromMATLAB(data, args{:});
+ arrowArray = arrow.array.Time64Array.fromMATLAB(data, varargin{:});
+ case "table"
+ arrowArray = arrow.array.StructArray.fromMATLAB(data, varargin{:});
otherwise
errid = "arrow:array:UnsupportedMATLABType";
msg = join(["Unable to convert MATLAB type" classname "to arrow
array."]);
diff --git a/matlab/test/arrow/array/tArray.m b/matlab/test/arrow/array/tArray.m
index 54b31270b2..545d382ddf 100644
--- a/matlab/test/arrow/array/tArray.m
+++ b/matlab/test/arrow/array/tArray.m
@@ -32,7 +32,8 @@ classdef tArray < matlab.unittest.TestCase
{[1 2], "arrow.array.Float64Array"}, ...
{datetime(2022, 1, 1), "arrow.array.TimestampArray"}, ...
{seconds([1 2]), "arrow.array.Time64Array"}, ...
- {["A" "B"], "arrow.array.StringArray"}};
+ {["A" "B"], "arrow.array.StringArray"}, ...
+ {table(["A" "B"]'), "arrow.array.StructArray"}};
end
methods(Test)
@@ -50,7 +51,7 @@ classdef tArray < matlab.unittest.TestCase
% Verify arrow.array throws an error with the identifier
% "arrow:array:UnsupportedMATLABType" if the input array is not one
% we support converting into an Arrow array.
- matlabArray = table;
+ matlabArray = {table};
fcn = @() arrow.array(matlabArray);
errID = "arrow:array:UnsupportedMATLABType";
testCase.verifyError(fcn, errID);
diff --git a/matlab/test/arrow/array/tStructArray.m
b/matlab/test/arrow/array/tStructArray.m
index 639df65bef..83e902ee2f 100644
--- a/matlab/test/arrow/array/tStructArray.m
+++ b/matlab/test/arrow/array/tStructArray.m
@@ -273,5 +273,91 @@ classdef tStructArray < matlab.unittest.TestCase
tc.verifyFalse(isequal(array1, array3));
end
+ function FromMATLABBasic(tc)
+ % Verify StructArray.fromMATLAB returns the expected
+ % StructArray.
+ import arrow.array.StructArray
+
+ T = table([1 2]', ["A1" "A2"]', VariableNames=["Number" "String"]);
+ array = StructArray.fromMATLAB(T);
+ tc.verifyEqual(array.Length, int64(2));
+ tc.verifyEqual(array.NumFields, int32(2));
+ tc.verifyEqual(array.FieldNames, ["Number" "String"]);
+
+ field1 = arrow.array([1 2]');
+ field2 = arrow.array(["A1" "A2"]');
+
+ tc.verifyEqual(field1, array.field(1));
+ tc.verifyEqual(field2, array.field(2));
+ end
+
+ function FromMATLABFieldNames(tc)
+ % Verify StructArray.fromMATLAB returns the expected
+ % StructArray when the FieldNames nv-pair is supplied.
+ import arrow.array.StructArray
+
+ T = table([1 2]', ["A1" "A2"]', VariableNames=["Number" "String"]);
+ array = StructArray.fromMATLAB(T, FieldNames=["Custom" "Name"]);
+ tc.verifyEqual(array.Length, int64(2));
+ tc.verifyEqual(array.NumFields, int32(2));
+ tc.verifyEqual(array.FieldNames, ["Custom" "Name"]);
+ tc.verifyEqual(array.Valid, [true; true]);
+
+ field1 = arrow.array([1 2]');
+ field2 = arrow.array(["A1" "A2"]');
+
+ tc.verifyEqual(field1, array.field(1));
+ tc.verifyEqual(field2, array.field(2));
+ end
+
+ function FromMATLABValid(tc)
+ % Verify StructArray.fromMATLAB returns the expected
+ % StructArray when the Valid nv-pair is supplied.
+
+ import arrow.array.StructArray
+
+ T = table([1 2]', ["A1" "A2"]', VariableNames=["Number" "String"]);
+ array = StructArray.fromMATLAB(T, Valid=2);
+ tc.verifyEqual(array.Length, int64(2));
+ tc.verifyEqual(array.NumFields, int32(2));
+ tc.verifyEqual(array.FieldNames, ["Number" "String"]);
+ tc.verifyEqual(array.Valid, [false; true]);
+
+ field1 = arrow.array([1 2]');
+ field2 = arrow.array(["A1" "A2"]');
+
+ tc.verifyEqual(field1, array.field(1));
+ tc.verifyEqual(field2, array.field(2));
+ end
+
+ function FromMATLABZeroVariablesError(tc)
+ % Verify StructArray.fromMATLAB throws an error when the input
+ % table T has zero variables.
+ import arrow.array.StructArray
+
+ fcn = @() StructArray.fromMATLAB(table);
+ tc.verifyError(fcn, "arrow:struct:ZeroVariables");
+ end
+
+ function FromMATLABWrongNumberFieldNames(tc)
+ % Verify StructArray.fromMATLAB throws an error when the
+ % FieldNames nv-pair is provided and its number of elements
+ % does not equal the number of variables in the input table T.
+
+ import arrow.array.StructArray
+
+ fcn = @() StructArray.fromMATLAB(table(1), FieldNames=["A" "B"]);
+ tc.verifyError(fcn, "arrow:tabular:WrongNumberColumnNames");
+ end
+
+ function FromMATLABValidNVPairBadIndex(tc)
+ % Verify StructArray.fromMATLAB throws an error when the
+ % Valid nv-pair is provided and it contains an invalid index.
+
+ import arrow.array.StructArray
+
+ fcn = @() StructArray.fromMATLAB(table(1), Valid=2);
+ tc.verifyError(fcn, "MATLAB:notLessEqual");
+ end
end
end
\ No newline at end of file
diff --git a/matlab/test/arrow/type/traits/tStructTraits.m
b/matlab/test/arrow/type/traits/tStructTraits.m
index 07833aca16..dad9ec012f 100644
--- a/matlab/test/arrow/type/traits/tStructTraits.m
+++ b/matlab/test/arrow/type/traits/tStructTraits.m
@@ -20,7 +20,7 @@ classdef tStructTraits < hTypeTraits
ArrayConstructor = @arrow.array.StructArray
ArrayClassName = "arrow.array.StructArray"
ArrayProxyClassName = "arrow.array.proxy.StructArray"
- ArrayStaticConstructor = missing
+ ArrayStaticConstructor = @arrow.array.StructArray.fromMATLAB
TypeConstructor = @arrow.type.StructType
TypeClassName = "arrow.type.StructType"
TypeProxyClassName = "arrow.type.proxy.StructType"
diff --git a/matlab/test/arrow/type/traits/ttraits.m
b/matlab/test/arrow/type/traits/ttraits.m
index 2880645f29..d2d80b3f8f 100644
--- a/matlab/test/arrow/type/traits/ttraits.m
+++ b/matlab/test/arrow/type/traits/ttraits.m
@@ -365,6 +365,17 @@ classdef ttraits < matlab.unittest.TestCase
testCase.verifyEqual(actualTraits, expectedTraits);
end
+ function TestMatlabTable(testCase)
+ import arrow.type.traits.*
+
+ type = "table";
+ expectedTraits = StructTraits();
+
+ actualTraits = traits(type);
+
+ testCase.verifyEqual(actualTraits, expectedTraits);
+ end
+
function TestErrorIfUnsupportedMatlabClass(testCase)
import arrow.type.traits.*