iajoiner commented on a change in pull request #9702:
URL: https://github.com/apache/arrow/pull/9702#discussion_r786781062
##########
File path: cpp/src/arrow/adapters/orc/adapter.cc
##########
@@ -628,41 +733,86 @@ class ArrowOutputStream : public liborc::OutputStream {
int64_t length_;
};
+Result<liborc::WriterOptions> MakeOrcWriterOptions(
+ arrow::adapters::orc::WriteOptions options) {
+ liborc::WriterOptions orc_options_;
+ orc_options_.setFileVersion(
+ liborc::FileVersion(static_cast<uint32_t>(options.file_version.major()),
+
static_cast<uint32_t>(options.file_version.minor())));
+ orc_options_.setStripeSize(static_cast<uint64_t>(options.stripe_size));
+ orc_options_.setCompressionBlockSize(
+ static_cast<uint64_t>(options.compression_block_size));
+ orc_options_.setCompressionStrategy(static_cast<liborc::CompressionStrategy>(
+ static_cast<int8_t>(options.compression_strategy)));
+
orc_options_.setRowIndexStride(static_cast<uint64_t>(options.row_index_stride));
+ orc_options_.setPaddingTolerance(options.padding_tolerance);
+
orc_options_.setDictionaryKeySizeThreshold(options.dictionary_key_size_threshold);
+ orc_options_.setPaddingTolerance(options.padding_tolerance);
+ std::set<uint64_t> orc_bloom_filter_columns_;
+ std::for_each(options.bloom_filter_columns.begin(),
options.bloom_filter_columns.end(),
+ [&orc_bloom_filter_columns_](const int64_t col) {
+ orc_bloom_filter_columns_.insert(static_cast<uint64_t>(col));
+ });
+ orc_options_.setColumnsUseBloomFilter(orc_bloom_filter_columns_);
+ orc_options_.setBloomFilterFPP(options.bloom_filter_fpp);
+ switch (options.compression) {
+ case Compression::UNCOMPRESSED:
+
orc_options_.setCompression(liborc::CompressionKind::CompressionKind_NONE);
+ break;
+ case Compression::GZIP:
+
orc_options_.setCompression(liborc::CompressionKind::CompressionKind_ZLIB);
+ break;
+ case Compression::SNAPPY:
+
orc_options_.setCompression(liborc::CompressionKind::CompressionKind_SNAPPY);
+ break;
+ case Compression::LZ4:
+
orc_options_.setCompression(liborc::CompressionKind::CompressionKind_LZ4);
+ break;
+ case Compression::ZSTD:
+
orc_options_.setCompression(liborc::CompressionKind::CompressionKind_ZSTD);
+ break;
+ default:
+ return Status::Invalid("Compression type not supported by ORC");
+ }
+ return orc_options_;
+}
+
} // namespace
class ORCFileWriter::Impl {
public:
- Status Open(arrow::io::OutputStream* output_stream) {
+ Status Open(arrow::io::OutputStream* output_stream, const WriteOptions&
write_options) {
out_stream_ = std::unique_ptr<liborc::OutputStream>(
checked_cast<liborc::OutputStream*>(new
ArrowOutputStream(*output_stream)));
+ write_options_ = write_options;
return Status::OK();
}
Status Write(const Table& table) {
- std::unique_ptr<liborc::WriterOptions> orc_options =
- std::unique_ptr<liborc::WriterOptions>(new liborc::WriterOptions());
ARROW_ASSIGN_OR_RAISE(auto orc_schema, GetOrcType(*(table.schema())));
+ ARROW_ASSIGN_OR_RAISE(auto orc_options_,
MakeOrcWriterOptions(write_options_));
+ auto batch_size_ = static_cast<uint64_t>(write_options_.batch_size);
Review comment:
Sure! Sorry for forgetting to change that when I decided to store the
Arrow `WriteOptions` as opposed to the ORC `WriterOptions`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]