tadeja commented on code in PR #41870:
URL: https://github.com/apache/arrow/pull/41870#discussion_r3501112646
##########
cpp/src/arrow/tensor.cc:
##########
@@ -284,75 +301,99 @@ struct ConvertColumnsToTensorRowMajorVisitor {
}
};
-template <typename DataType>
-inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out,
+template <typename DataType, typename Container>
+inline void ConvertColumnsToTensor(const Container& container, uint8_t* out,
bool row_major) {
using CType = typename arrow::TypeTraits<DataType>::CType;
auto* out_values = reinterpret_cast<CType*>(out);
- int i = 0;
- for (const auto& column : batch.columns()) {
- if (row_major) {
- ConvertColumnsToTensorRowMajorVisitor<CType> visitor{out_values,
*column->data(),
-
batch.num_columns(), i++};
- DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
- } else {
- ConvertColumnsToTensorVisitor<CType> visitor{out_values,
*column->data()};
- DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
+ for (int col_idx = 0; col_idx < container.num_columns(); ++col_idx) {
+ if constexpr (std::is_same_v<Container, Table>) {
+ int64_t chunk_idx = 0;
+
+ for (const auto& chunk : container.column(col_idx)->chunks()) {
+ if (row_major) {
+ ConvertArrayToTensorRowMajorVisitor<CType> visitor{
+ out_values, *chunk->data(), container.num_columns(), col_idx,
chunk_idx};
+ DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+ chunk_idx += chunk->length();
+ } else {
+ ConvertArrayToTensorVisitor<CType> visitor{out_values,
*chunk->data()};
+ DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+ }
+ }
+ } else if constexpr (std::is_same_v<Container, RecordBatch>) {
+ const auto& array_data = container.column_data(col_idx);
+
+ if (row_major) {
+ ConvertArrayToTensorRowMajorVisitor<CType> visitor{
+ out_values, *array_data, container.num_columns(), col_idx, 0};
+ DCHECK_OK(VisitTypeInline(*array_data->type, &visitor));
+ } else {
+ ConvertArrayToTensorVisitor<CType> visitor{out_values, *array_data};
+ DCHECK_OK(VisitTypeInline(*array_data->type, &visitor));
+ }
}
}
}
-Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool
row_major,
- MemoryPool* pool, std::shared_ptr<Tensor>* tensor) {
- if (batch.num_columns() == 0) {
+template <typename Container>
+Status ToTensorImpl(const Container& container, bool null_to_nan, bool
row_major,
+ MemoryPool* pool, std::shared_ptr<Tensor>* tensor) {
+ if (container.num_columns() == 0) {
return Status::TypeError(
- "Conversion to Tensor for RecordBatches without columns/schema is not "
+ "Conversion to Tensor for Tables or RecordBatches without
columns/schema is not "
"supported.");
}
// Check for no validity bitmap of each field
// if null_to_nan conversion is set to false
- for (int i = 0; i < batch.num_columns(); ++i) {
- if (batch.column(i)->null_count() > 0 && !null_to_nan) {
+ for (int i = 0; i < container.num_columns(); ++i) {
+ int64_t null_count;
Review Comment:
```suggestion
int64_t null_count = 0;
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]