zeroshade commented on code in PR #40064:
URL: https://github.com/apache/arrow/pull/40064#discussion_r1508162851
##########
cpp/src/arrow/record_batch.cc:
##########
@@ -247,6 +248,97 @@ Result<std::shared_ptr<StructArray>>
RecordBatch::ToStructArray() const {
/*offset=*/0);
}
+template <typename DataType>
+inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out) {
+ using CType = typename arrow::TypeTraits<DataType>::CType;
+ auto* out_values = reinterpret_cast<CType*>(out);
+
+ // Loop through all of the columns
+ for (int i = 0; i < batch.num_columns(); ++i) {
+ const auto* in_values = batch.column(i)->data()->GetValues<CType>(1);
+
+ // Copy data of each column
+ memcpy(out_values, in_values, sizeof(CType) * batch.num_rows());
+ out_values += batch.num_rows();
+ } // End loop through columns
+}
+
+Result<std::shared_ptr<Tensor>> RecordBatch::ToTensor(MemoryPool* pool) const {
+ if (num_columns() == 0) {
+ return Status::TypeError(
+ "Conversion to Tensor for RecordBatches without columns/schema is not "
+ "supported.");
+ }
+ const auto& type = column(0)->type();
+ // Check for supported data types
+ if (!is_integer(type->id()) && !is_floating(type->id())) {
+ return Status::TypeError("DataType is not supported: ", type->ToString());
+ }
+ // Check for uniform data type
+ // Check for no validity bitmap of each field
+ for (int i = 0; i < num_columns(); ++i) {
+ if (column(i)->null_count() > 0) {
+ return Status::TypeError("Can only convert a RecordBatch with no
nulls.");
+ }
+ if (column(i)->type() != type) {
+ return Status::TypeError("Can only convert a RecordBatch with uniform
data type.");
+ }
+ }
+
+ // Allocate memory
+ ARROW_ASSIGN_OR_RAISE(
+ std::shared_ptr<Buffer> result,
+ AllocateBuffer(type->bit_width() * num_columns() * num_rows(), pool));
+ // Copy data
+ switch (type->id()) {
Review Comment:
should we also support the temporal types which are backed by integral types
such as DATE32 / DATE64 / etc.?
##########
cpp/src/arrow/record_batch.cc:
##########
@@ -247,6 +248,97 @@ Result<std::shared_ptr<StructArray>>
RecordBatch::ToStructArray() const {
/*offset=*/0);
}
+template <typename DataType>
+inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out) {
+ using CType = typename arrow::TypeTraits<DataType>::CType;
+ auto* out_values = reinterpret_cast<CType*>(out);
+
+ // Loop through all of the columns
+ for (int i = 0; i < batch.num_columns(); ++i) {
+ const auto* in_values = batch.column(i)->data()->GetValues<CType>(1);
+
+ // Copy data of each column
+ memcpy(out_values, in_values, sizeof(CType) * batch.num_rows());
+ out_values += batch.num_rows();
+ } // End loop through columns
+}
+
+Result<std::shared_ptr<Tensor>> RecordBatch::ToTensor(MemoryPool* pool) const {
+ if (num_columns() == 0) {
+ return Status::TypeError(
+ "Conversion to Tensor for RecordBatches without columns/schema is not "
+ "supported.");
+ }
+ const auto& type = column(0)->type();
+ // Check for supported data types
+ if (!is_integer(type->id()) && !is_floating(type->id())) {
+ return Status::TypeError("DataType is not supported: ", type->ToString());
+ }
+ // Check for uniform data type
+ // Check for no validity bitmap of each field
+ for (int i = 0; i < num_columns(); ++i) {
+ if (column(i)->null_count() > 0) {
+ return Status::TypeError("Can only convert a RecordBatch with no
nulls.");
+ }
+ if (column(i)->type() != type) {
Review Comment:
rather than requiring exact type matches, could we cast the types? Is it
worthwhile having an option for that?
##########
cpp/src/arrow/record_batch.cc:
##########
@@ -247,6 +248,97 @@ Result<std::shared_ptr<StructArray>>
RecordBatch::ToStructArray() const {
/*offset=*/0);
}
+template <typename DataType>
+inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out) {
+ using CType = typename arrow::TypeTraits<DataType>::CType;
+ auto* out_values = reinterpret_cast<CType*>(out);
+
+ // Loop through all of the columns
+ for (int i = 0; i < batch.num_columns(); ++i) {
+ const auto* in_values = batch.column(i)->data()->GetValues<CType>(1);
Review Comment:
should we be using a `DCHECK` to confirm that everything is the same type or
that we've casted things properly?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]