This is an automated email from the ASF dual-hosted git repository.

kevingurney pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 71329ce33a GH-37042: [MATLAB] Implement Feather V1 Writer using new 
MATLAB Interface APIs (#37043)
71329ce33a is described below

commit 71329ce33a18a53e322514d0e463677ebad648c9
Author: sgilmore10 <74676073+sgilmor...@users.noreply.github.com>
AuthorDate: Mon Aug 7 15:22:03 2023 -0400

    GH-37042: [MATLAB] Implement Feather V1 Writer using new MATLAB Interface 
APIs (#37043)
    
    
    
    ### Rationale for this change
    
    Now that we've have the basic building blocks for tabular IO in the MATLAB 
Interface (`Array`, `Schema`, `RecordBatch`), we can implement a Feather V1 
writer in terms of the new APIs.
    
    This is the first in a series of pull requests in which we will work on 
replacing the legacy feather V1 infrastructure with a new implementation that 
use the MATLAB Interface APIs. A side effect of doing this work is that we can 
eventually delete a lot of legacy build infrastructure and code.
    
    ### What changes are included in this PR?
    
    1. Added a new class called `arrow.internal.io.feather.Writer` which can be 
used to write feather V1 files. It has one public property named `Filename` and 
one public method `write`.
    
    Below is an example of its usage:
    
    ```matlab
    >> T = table([1; 2; 3], single([10; 11; 12]));
    
    T =
    
      3×2 table
    
        Var1    Var2
        ____    ____
    
         1       10
         2       11
         3       12
    
    >> filename = "/tmp/table.feather";
    >> writer = arrow.internal.io.feather.Writer(filename)
    
    writer =
    
      Writer with properties:
    
        Filename: "/tmp/table.feather"
    
    >> writer.write(T);
    
    ```
    
    2. Added an `unwrap` method to `proxy::RecordBatch` so that the 
`FeatherWriter::write` method can access the underlying `RecordBatch` from the 
proxy.
    3.  Changed the `SetAccess` and `GetAccess` of the `Proxy` property on 
`arrow.tabular.RecordBatch` to `private` and `public`, respectively.
    
    ### Are these changes tested?
    
    Yes, added a new test file called `tRoundTrip.m` in the 
`matlab/test/arrow/io/feather` folder.
    
    ### Are there any user-facing changes?
    
    No.
    
    ### Future Directions
    
    1. Add a new class for reading feather V1 files (See #37041).
    2. Integrate this class in the public `featherwrite` function.
    5. Once this class is integrated with `featherwrite`, we can delete the 
legacy build infrastructure and source code.
    * Closes: #37042
    
    Authored-by: Sarah Gilmore <sgilm...@mathworks.com>
    Signed-off-by: Kevin Gurney <kgur...@mathworks.com>
---
 matlab/src/cpp/arrow/matlab/error/error.h          |  4 +
 .../matlab/io/feather/proxy/feather_writer.cc      | 90 ++++++++++++++++++++++
 .../feather/proxy/feather_writer.h}                | 24 +++---
 matlab/src/cpp/arrow/matlab/proxy/factory.cc       |  2 +
 .../cpp/arrow/matlab/tabular/proxy/record_batch.cc |  4 +
 .../cpp/arrow/matlab/tabular/proxy/record_batch.h  |  2 +
 .../matlab/+arrow/+internal/+io/+feather/Writer.m  | 48 ++++++++++++
 matlab/src/matlab/+arrow/+tabular/RecordBatch.m    |  2 +-
 matlab/test/arrow/io/feather/tRoundTrip.m          | 52 +++++++++++++
 matlab/tools/cmake/BuildMatlabArrowInterface.cmake |  4 +-
 10 files changed, 217 insertions(+), 15 deletions(-)

diff --git a/matlab/src/cpp/arrow/matlab/error/error.h 
b/matlab/src/cpp/arrow/matlab/error/error.h
index b7c0d7d696..e1d2982f28 100644
--- a/matlab/src/cpp/arrow/matlab/error/error.h
+++ b/matlab/src/cpp/arrow/matlab/error/error.h
@@ -180,4 +180,8 @@ namespace arrow::matlab::error {
     static const char* UNKNOWN_PROXY_FOR_ARRAY_TYPE = 
"arrow:array:UnknownProxyForArrayType";
     static const char* RECORD_BATCH_NUMERIC_INDEX_WITH_EMPTY_RECORD_BATCH = 
"arrow:tabular:recordbatch:NumericIndexWithEmptyRecordBatch";
     static const char* RECORD_BATCH_INVALID_NUMERIC_COLUMN_INDEX = 
"arrow:tabular:recordbatch:InvalidNumericColumnIndex";
+    static const char* FAILED_TO_OPEN_FILE_FOR_WRITE = 
"arrow:io:FailedToOpenFileForWrite";
+    static const char* FEATHER_FAILED_TO_WRITE_TABLE = 
"arrow:io:feather:FailedToWriteTable";
+    static const char* TABLE_FROM_RECORD_BATCH = "arrow:table:FromRecordBatch";
+
 }
diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc 
b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc
new file mode 100644
index 0000000000..a27e1fb0e6
--- /dev/null
+++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/matlab/io/feather/proxy/feather_writer.h"
+#include "arrow/matlab/tabular/proxy/record_batch.h"
+#include "arrow/matlab/error/error.h"
+
+#include "arrow/result.h"
+#include "arrow/table.h"
+#include "arrow/util/utf8.h"
+
+#include "arrow/io/file.h"
+#include "arrow/ipc/feather.h"
+
+#include "libmexclass/proxy/ProxyManager.h"
+
+namespace arrow::matlab::io::feather::proxy {
+
+    FeatherWriter::FeatherWriter(const std::string& filename) : 
filename{filename} {
+        REGISTER_METHOD(FeatherWriter, getFilename);
+        REGISTER_METHOD(FeatherWriter, write);
+    }
+
+    libmexclass::proxy::MakeResult FeatherWriter::make(const 
libmexclass::proxy::FunctionArguments& constructor_arguments) {
+        namespace mda = ::matlab::data;
+        mda::StructArray opts = constructor_arguments[0];
+        const mda::StringArray filename_mda = opts[0]["Filename"];
+
+        const auto filename_utf16 = std::u16string(filename_mda[0]);
+        MATLAB_ASSIGN_OR_ERROR(const auto filename_utf8,
+                               arrow::util::UTF16StringToUTF8(filename_utf16),
+                               error::UNICODE_CONVERSION_ERROR_ID);
+        
+        return std::make_shared<FeatherWriter>(filename_utf8);
+    }
+
+    void FeatherWriter::getFilename(libmexclass::proxy::method::Context& 
context) {
+        namespace mda = ::matlab::data;
+        MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto utf16_filename,
+                                            
arrow::util::UTF8StringToUTF16(filename), 
+                                            context,
+                                            
error::UNICODE_CONVERSION_ERROR_ID);
+        mda::ArrayFactory factory;
+        auto str_mda = factory.createScalar(utf16_filename);
+        context.outputs[0] = str_mda;
+    }
+
+    void FeatherWriter::write(libmexclass::proxy::method::Context& context) {
+        namespace mda = ::matlab::data;
+        mda::StructArray opts = context.inputs[0];
+        const mda::TypedArray<uint64_t> record_batch_proxy_id_mda = 
opts[0]["RecordBatchProxyID"];
+        const uint64_t record_batch_proxy_id = record_batch_proxy_id_mda[0]; 
+
+        auto proxy = 
libmexclass::proxy::ProxyManager::getProxy(record_batch_proxy_id);
+        auto record_batch_proxy = 
std::static_pointer_cast<arrow::matlab::tabular::proxy::RecordBatch>(proxy);
+        auto record_batch = record_batch_proxy->unwrap();
+        
+        MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto table, 
+                                            
arrow::Table::FromRecordBatches({record_batch}),
+                                            context,
+                                            error::TABLE_FROM_RECORD_BATCH);
+
+        
MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(std::shared_ptr<arrow::io::OutputStream> 
output_stream,
+                                            
arrow::io::FileOutputStream::Open(filename),
+                                            context,
+                                            
error::FAILED_TO_OPEN_FILE_FOR_WRITE);
+
+         // Specify the feather file format version as V1
+        arrow::ipc::feather::WriteProperties write_props;
+        write_props.version = arrow::ipc::feather::kFeatherV1Version;
+
+        MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(ipc::feather::WriteTable(*table, 
output_stream.get(), write_props),
+                                            context,
+                                            
error::FEATHER_FAILED_TO_WRITE_TABLE);
+    }
+}
diff --git a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h 
b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h
similarity index 59%
copy from matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h
copy to matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h
index b5d741060a..dadb479887 100644
--- a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h
+++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h
@@ -17,27 +17,25 @@
 
 #pragma once
 
-#include "arrow/record_batch.h"
+#include "arrow/status.h"
 
 #include "libmexclass/proxy/Proxy.h"
 
-namespace arrow::matlab::tabular::proxy {
+namespace arrow::matlab::io::feather::proxy {
 
-    class RecordBatch : public libmexclass::proxy::Proxy {
+    class FeatherWriter : public libmexclass::proxy::Proxy {
         public:
-            RecordBatch(std::shared_ptr<arrow::RecordBatch> record_batch);
+            FeatherWriter(const std::string& filename);
         
-            virtual ~RecordBatch() {}
+            ~FeatherWriter() {}
 
-            static libmexclass::proxy::MakeResult make(const 
libmexclass::proxy::FunctionArguments& constructor_arguments);
-    
+            static libmexclass::proxy::MakeResult make(const 
libmexclass::proxy::FunctionArguments& constructor_arguments);  
+        
         protected:
-            void toString(libmexclass::proxy::method::Context& context);
-            void numColumns(libmexclass::proxy::method::Context& context);
-            void columnNames(libmexclass::proxy::method::Context& context);
-            void getColumnByIndex(libmexclass::proxy::method::Context& 
context);
+            void getFilename(libmexclass::proxy::method::Context& context);
+            void write(libmexclass::proxy::method::Context& context);
 
-            std::shared_ptr<arrow::RecordBatch> record_batch;
+        private:
+            const std::string filename; 
     };
-
 }
diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc 
b/matlab/src/cpp/arrow/matlab/proxy/factory.cc
index 7d18c6c6b6..7a2a4f3192 100644
--- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc
+++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc
@@ -25,6 +25,7 @@
 #include "arrow/matlab/type/proxy/string_type.h"
 #include "arrow/matlab/type/proxy/timestamp_type.h"
 #include "arrow/matlab/type/proxy/field.h"
+#include "arrow/matlab/io/feather/proxy/feather_writer.h"
 
 #include "factory.h"
 
@@ -60,6 +61,7 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const 
ClassName& class_name,
     REGISTER_PROXY(arrow.type.proxy.BooleanType    , 
arrow::matlab::type::proxy::PrimitiveCType<bool>);
     REGISTER_PROXY(arrow.type.proxy.StringType     , 
arrow::matlab::type::proxy::StringType);
     REGISTER_PROXY(arrow.type.proxy.TimestampType  , 
arrow::matlab::type::proxy::TimestampType);
+    REGISTER_PROXY(arrow.io.feather.proxy.FeatherWriter  , 
arrow::matlab::io::feather::proxy::FeatherWriter);
 
     return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not 
find matching C++ proxy for " + class_name};
 };
diff --git a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc 
b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc
index ed30472f6c..e159e926ec 100644
--- a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc
+++ b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc
@@ -56,6 +56,10 @@ namespace arrow::matlab::tabular::proxy {
         REGISTER_METHOD(RecordBatch, getColumnByIndex);
     }
 
+    std::shared_ptr<arrow::RecordBatch> RecordBatch::unwrap() {
+        return record_batch;
+    }
+
     void RecordBatch::toString(libmexclass::proxy::method::Context& context) {
         namespace mda = ::matlab::data;
         MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto utf16_string, 
arrow::util::UTF8StringToUTF16(record_batch->ToString()), context, 
error::UNICODE_CONVERSION_ERROR_ID);
diff --git a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h 
b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h
index b5d741060a..b8c038816b 100644
--- a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h
+++ b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h
@@ -29,6 +29,8 @@ namespace arrow::matlab::tabular::proxy {
         
             virtual ~RecordBatch() {}
 
+            std::shared_ptr<arrow::RecordBatch> unwrap();
+
             static libmexclass::proxy::MakeResult make(const 
libmexclass::proxy::FunctionArguments& constructor_arguments);
     
         protected:
diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m 
b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m
new file mode 100644
index 0000000000..470c41fd5b
--- /dev/null
+++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m
@@ -0,0 +1,48 @@
+%WRITER Class for writing feather V1 files.
+
+% Licensed to the Apache Software Foundation (ASF) under one or more
+% contributor license agreements.  See the NOTICE file distributed with
+% this work for additional information regarding copyright ownership.
+% The ASF licenses this file to you under the Apache License, Version
+% 2.0 (the "License"); you may not use this file except in compliance
+% with the License.  You may obtain a copy of the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+% implied.  See the License for the specific language governing
+% permissions and limitations under the License.
+classdef Writer < matlab.mixin.Scalar
+
+    properties(Hidden, SetAccess=private, GetAccess=public)
+        Proxy
+    end
+
+    properties(Dependent)
+        Filename
+    end
+
+    methods
+        function obj = Writer(filename)
+            arguments
+                filename(1, 1) {mustBeNonmissing, mustBeNonzeroLengthText}
+            end
+
+            args = struct(Filename=filename);
+            proxyName = "arrow.io.feather.proxy.FeatherWriter";
+            obj.Proxy = arrow.internal.proxy.create(proxyName, args);
+        end
+
+        function write(obj, T)
+            rb = arrow.recordbatch(T);
+            args = struct(RecordBatchProxyID=rb.Proxy.ID);
+            obj.Proxy.write(args);
+        end
+
+        function filename = get.Filename(obj)
+            filename = obj.Proxy.getFilename();
+        end
+    end
+end
\ No newline at end of file
diff --git a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m 
b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m
index 0d002797f0..be5eee7d89 100644
--- a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m
+++ b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m
@@ -23,7 +23,7 @@ classdef RecordBatch < matlab.mixin.CustomDisplay & ...
         ColumnNames
     end
 
-    properties (Access=protected)
+    properties (Hidden, SetAccess=private, GetAccess=public)
         Proxy
     end
 
diff --git a/matlab/test/arrow/io/feather/tRoundTrip.m 
b/matlab/test/arrow/io/feather/tRoundTrip.m
new file mode 100644
index 0000000000..d56152be6d
--- /dev/null
+++ b/matlab/test/arrow/io/feather/tRoundTrip.m
@@ -0,0 +1,52 @@
+%TROUNDTRIP Round trip tests for feather.
+
+% Licensed to the Apache Software Foundation (ASF) under one or more
+% contributor license agreements.  See the NOTICE file distributed with
+% this work for additional information regarding copyright ownership.
+% The ASF licenses this file to you under the Apache License, Version
+% 2.0 (the "License"); you may not use this file except in compliance
+% with the License.  You may obtain a copy of the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+% implied.  See the License for the specific language governing
+% permissions and limitations under the License.
+classdef tRoundTrip < matlab.unittest.TestCase
+
+    methods(TestClassSetup)
+        % Delete once arrow.internal.io.feather.Reader is submitted.
+        function addFeatherFunctionsToMATLABPath(testCase)
+            import matlab.unittest.fixtures.PathFixture
+            % Add Feather test utilities to the MATLAB path.
+            testCase.applyFixture(PathFixture('../../../util'));
+            % arrow.cpp.call must be on the MATLAB path.
+            testCase.assertTrue(~isempty(which('arrow.cpp.call')), ...
+                '''arrow.cpp.call'' must be on the MATLAB path. Use 
''addpath'' to add folders to the MATLAB path.');
+        end
+    end
+
+    methods(Test)
+        function Basic(testCase)
+            import matlab.unittest.fixtures.TemporaryFolderFixture
+            
+            fixture = testCase.applyFixture(TemporaryFolderFixture);
+            filename = fullfile(fixture.Folder, "temp.feather");
+
+            DoubleVar = [10; 20; 30; 40];
+            SingleVar = single([10; 15; 20; 25]);
+            tWrite = table(DoubleVar, SingleVar);
+            
+            featherwrite(tWrite, filename);
+            tRead = featherread(filename);
+            testCase.verifyEqual(tWrite, tRead);
+        end
+    end
+end
+
+function featherwrite(T, filename)
+    writer = arrow.internal.io.feather.Writer(filename);
+    writer.write(T);
+end
\ No newline at end of file
diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake 
b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
index f4696cfad2..1d57999417 100644
--- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
+++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
@@ -55,7 +55,9 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES 
"${CMAKE_SOURCE_DIR}/src/cpp/a
                                                   
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/string_type.cc"
                                                   
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc"
                                                   
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/field.cc"
-                                                  
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/wrap.cc")
+                                                  
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/wrap.cc"
+                                                  
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc")
+
 
 
 set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR 
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy")

Reply via email to