wesm commented on a change in pull request #10305:
URL: https://github.com/apache/arrow/pull/10305#discussion_r657146588



##########
File path: matlab/src/feather_reader.cc
##########
@@ -177,32 +182,34 @@ Status FeatherReader::Open(const std::string& filename,
   *feather_reader = std::shared_ptr<FeatherReader>(new FeatherReader());
 
   // Open file with given filename as a ReadableFile.
-  std::shared_ptr<io::ReadableFile> readable_file(nullptr);
-
-  RETURN_NOT_OK(io::ReadableFile::Open(filename, &readable_file));
+  arrow::Result<std::shared_ptr<io::ReadableFile>> maybe_readable_file =
+      io::ReadableFile::Open(filename);
+  RETURN_NOT_OK(maybe_readable_file);
 
   // TableReader expects a RandomAccessFile.
-  std::shared_ptr<io::RandomAccessFile> random_access_file(readable_file);
+  std::shared_ptr<io::RandomAccessFile> random_access_file{
+      maybe_readable_file.ValueOrDie()};

Review comment:
       Why not use `ARROW_ASSIGN_OR_RAISE` here?

##########
File path: matlab/CMakeLists.txt
##########
@@ -15,7 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
-cmake_minimum_required(VERSION 3.2)
+cmake_minimum_required(VERSION 3.20)

Review comment:
       This seems a bit high, is it necessary?

##########
File path: matlab/src/feather_writer.cc
##########
@@ -316,22 +331,46 @@ Status FeatherWriter::WriteVariables(const mxArray* 
variables) {
     std::string name_str = internal::MxArrayToString(name);
     std::string type_str = internal::MxArrayToString(type);
 
+    std::shared_ptr<arrow::DataType> datatype =
+        internal::ConvertMatlabTypeStringToArrowDataType(type_str);
+    std::shared_ptr<arrow::Field> field =
+        std::make_shared<arrow::Field>(name_str, datatype);
+
+    arrow::Result<std::shared_ptr<ResizableBuffer>> maybe_buffer =
+        arrow::AllocateResizableBuffer(internal::BitPackedLength(num_rows_));
+    RETURN_NOT_OK(maybe_buffer);
+    std::shared_ptr<ResizableBuffer> validity_bitmap = 
maybe_buffer.ValueOrDie();

Review comment:
       ARROW_ASSIGN_OR_RAISE?

##########
File path: matlab/src/featherwritemex.cc
##########
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <string>
-
 #include <mex.h>
 
+#include <string>
+

Review comment:
       ?

##########
File path: matlab/src/feather_reader.cc
##########
@@ -242,16 +245,34 @@ mxArray* FeatherReader::ReadVariables() const {
   mxArray* variables =
       mxCreateStructMatrix(1, num_variables_, num_variable_fields, fieldnames);
 
-  // Read all the table variables in the Feather file into memory.
+  // Read the entire table in the Feather file into memory.
+  std::shared_ptr<arrow::Table> table = nullptr;
+  arrow::Status status = reader_->Read(&table);
+  if (!status.ok()) {
+    mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::FailedToReadTable",
+                      "Failed to read arrow::Table from Feather file.");

Review comment:
       do you want to append the Status message? You might consider having a 
utility to adorn a status with additional Matlab-specific error message to make 
this pattern simpler

##########
File path: matlab/src/feather_writer.h
##########
@@ -17,40 +17,36 @@
 
 #pragma once
 
-#include <memory>
-#include <string>
-
 #include <arrow/ipc/feather.h>
 #include <arrow/status.h>
 #include <arrow/type.h>
-
 #include <matrix.h>
 
+#include <memory>
+#include <string>

Review comment:
       Is there a reason to move these here?

##########
File path: matlab/src/feather_writer.cc
##########
@@ -316,22 +331,46 @@ Status FeatherWriter::WriteVariables(const mxArray* 
variables) {
     std::string name_str = internal::MxArrayToString(name);
     std::string type_str = internal::MxArrayToString(type);
 
+    std::shared_ptr<arrow::DataType> datatype =
+        internal::ConvertMatlabTypeStringToArrowDataType(type_str);
+    std::shared_ptr<arrow::Field> field =
+        std::make_shared<arrow::Field>(name_str, datatype);
+
+    arrow::Result<std::shared_ptr<ResizableBuffer>> maybe_buffer =
+        arrow::AllocateResizableBuffer(internal::BitPackedLength(num_rows_));
+    RETURN_NOT_OK(maybe_buffer);
+    std::shared_ptr<ResizableBuffer> validity_bitmap = 
maybe_buffer.ValueOrDie();
+
     // Populate bit-packed arrow::Buffer using validity data in the mxArray*.
     internal::BitPackBuffer(valid, validity_bitmap);
 
     // Wrap mxArray data in an arrow::Array of the equivalent type.
-    std::unique_ptr<Array> array =
+    std::shared_ptr<Array> array =
         internal::WriteVariableData(data, type_str, validity_bitmap);
 
     // Verify that the arrow::Array has the right number of elements.
-    internal::ValidateNumRows(array->length(), this->num_rows_);
+    internal::ValidateNumRows(array->length(), num_rows_);
+
+    // Append the field to the schema builder
+    RETURN_NOT_OK(schema_builder.AddField(field));
 
-    // Write another column to the Feather file.
-    ARROW_RETURN_NOT_OK(this->table_writer_->Append(name_str, *array));
+    // Store the table column
+    table_columns.push_back(array);
   }
+  // Create the table schema
+  arrow::Result<std::shared_ptr<arrow::Schema>> table_schema_result =
+      schema_builder.Finish();
+  RETURN_NOT_OK(table_schema_result);
+
+  std::shared_ptr<arrow::Schema> table_schema = 
table_schema_result.ValueOrDie();

Review comment:
       ARROW_ASSIGN_OR_RAISE?

##########
File path: matlab/src/feather_writer.cc
##########
@@ -26,18 +24,53 @@
 #include <arrow/status.h>
 #include <arrow/table.h>
 #include <arrow/type.h>
-#include <arrow/util/bit-util.h>
-
+#include <arrow/util/bit_util.h>
+#include <arrow/util/bitmap_generate.h>
+#include <arrow/util/key_value_metadata.h>
 #include <mex.h>
 
-#include "feather_writer.h"
+#include <cmath>
+#include <functional> /* for std::multiplies */
+#include <numeric>    /* for std::accumulate */

Review comment:
       We generally put stdlib includes before third party includes

##########
File path: matlab/src/featherreadmex.cc
##########
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <string>
-
 #include <mex.h>
 
+#include <string>
+

Review comment:
       reason for this change?

##########
File path: matlab/src/feather_writer.cc
##########
@@ -248,60 +279,44 @@ Status FeatherWriter::Open(const std::string& filename,
   *feather_writer = std::shared_ptr<FeatherWriter>(new FeatherWriter());
 
   // Open a FileOutputStream corresponding to the provided filename.
-  std::shared_ptr<io::OutputStream> writable_file(nullptr);
-  ARROW_RETURN_NOT_OK(io::FileOutputStream::Open(filename, &writable_file));
-
-  // TableWriter::Open expects a shared_ptr to an OutputStream.
-  // Open the Feather file for writing with a TableWriter.
-  return ipc::feather::TableWriter::Open(writable_file,
-                                         &(*feather_writer)->table_writer_);
-}
-
-// Write table metadata to the Feather file from a mxArray*.
-void FeatherWriter::WriteMetadata(const mxArray* metadata) {
-  // Verify that all required fieldnames are provided.
-  internal::ValidateMxStructField(metadata, "Description", mxCHAR_CLASS, true);
-  internal::ValidateMxStructField(metadata, "NumRows", mxDOUBLE_CLASS, false);
-  internal::ValidateMxStructField(metadata, "NumVariables", mxDOUBLE_CLASS, 
false);
-
-  // Convert Description to a std::string and set on FeatherWriter and 
TableWriter.
-  std::string description =
-      internal::MxArrayToString(mxGetField(metadata, 0, "Description"));
-  this->description_ = description;
-  this->table_writer_->SetDescription(description);
-
-  // Get the NumRows field in the struct array and set on TableWriter.
-  this->num_rows_ = static_cast<int64_t>(mxGetScalar(mxGetField(metadata, 0, 
"NumRows")));
-  this->table_writer_->SetNumRows(this->num_rows_);
+  arrow::Result<std::shared_ptr<arrow::io::OutputStream>> 
maybe_file_output_stream =
+      io::FileOutputStream::Open(filename, 
&((*feather_writer)->file_output_stream_));
+  RETURN_NOT_OK(maybe_file_output_stream);
+  (*feather_writer)->file_output_stream_ = 
maybe_file_output_stream.ValueOrDie();

Review comment:
       ARROW_ASSIGN_OR_RAISE?

##########
File path: matlab/src/feather_reader.cc
##########
@@ -177,32 +182,34 @@ Status FeatherReader::Open(const std::string& filename,
   *feather_reader = std::shared_ptr<FeatherReader>(new FeatherReader());
 
   // Open file with given filename as a ReadableFile.
-  std::shared_ptr<io::ReadableFile> readable_file(nullptr);
-
-  RETURN_NOT_OK(io::ReadableFile::Open(filename, &readable_file));
+  arrow::Result<std::shared_ptr<io::ReadableFile>> maybe_readable_file =
+      io::ReadableFile::Open(filename);
+  RETURN_NOT_OK(maybe_readable_file);
 
   // TableReader expects a RandomAccessFile.
-  std::shared_ptr<io::RandomAccessFile> random_access_file(readable_file);
+  std::shared_ptr<io::RandomAccessFile> random_access_file{
+      maybe_readable_file.ValueOrDie()};
 
   // Open the Feather file for reading with a TableReader.
-  RETURN_NOT_OK(ipc::feather::TableReader::Open(random_access_file,
-                                                
&(*feather_reader)->table_reader_));
-
-  // Read the table metadata from the Feather file.
-  (*feather_reader)->num_rows_ = (*feather_reader)->table_reader_->num_rows();
-  (*feather_reader)->num_variables_ = 
(*feather_reader)->table_reader_->num_columns();
-  (*feather_reader)->description_ =
-      (*feather_reader)->table_reader_->HasDescription()
-          ? (*feather_reader)->table_reader_->GetDescription()
-          : "";
-
-  if ((*feather_reader)->num_rows_ > internal::MAX_MATLAB_SIZE ||
-      (*feather_reader)->num_variables_ > internal::MAX_MATLAB_SIZE) {
-    mexErrMsgIdAndTxt("MATLAB:arrow:SizeTooLarge",
-                      "The table size exceeds MATLAB limits: %u x %u",
-                      (*feather_reader)->num_rows_, 
(*feather_reader)->num_variables_);
+  arrow::Result<std::shared_ptr<ipc::feather::Reader>> maybe_reader =
+      ipc::feather::Reader::Open(random_access_file);
+  RETURN_NOT_OK(maybe_reader);
+
+  // Set the internal reader_ object.
+  (*feather_reader)->reader_ = maybe_reader.ValueOrDie();

Review comment:
       `ARROW_ASSIGN_OR_RAISE`?

##########
File path: matlab/src/feather_reader.cc
##########
@@ -242,16 +245,34 @@ mxArray* FeatherReader::ReadVariables() const {
   mxArray* variables =
       mxCreateStructMatrix(1, num_variables_, num_variable_fields, fieldnames);
 
-  // Read all the table variables in the Feather file into memory.
+  // Read the entire table in the Feather file into memory.
+  std::shared_ptr<arrow::Table> table = nullptr;

Review comment:
       redundant




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to