paleolimbot commented on a change in pull request #12030: URL: https://github.com/apache/arrow/pull/12030#discussion_r777668121
########## File path: r/src/io.cpp ########## @@ -178,4 +180,134 @@ void io___BufferOutputStream__Write( StopIfNotOk(stream->Write(RAW(bytes), bytes.size())); } +// TransformInputStream::TransformFunc wrapper + +class RIconvWrapper { + public: + RIconvWrapper(std::string to, std::string from) + : handle_(Riconv_open(to.c_str(), from.c_str())) { + if (handle_ == ((void*)-1)) { + cpp11::stop("Can't convert encoding from '%s' to '%s'", from.c_str(), to.c_str()); + } + } + + size_t iconv(const char** inbuf, size_t* inbytesleft, char** outbuf, + size_t* outbytesleft) { + return Riconv(handle_, inbuf, inbytesleft, outbuf, outbytesleft); + } + + ~RIconvWrapper() { + if (handle_ != ((void*)-1)) { + Riconv_close(handle_); + } + } + + protected: + void* handle_; +}; + +struct ReencodeUTF8TransformFunctionWrapper { + explicit ReencodeUTF8TransformFunctionWrapper(std::string from) + : from_(from), iconv_("UTF-8", from), n_pending_(0) {} + + // This may get copied and we need a fresh RIconvWrapper for each copy. + ReencodeUTF8TransformFunctionWrapper(const ReencodeUTF8TransformFunctionWrapper& ref) + : ReencodeUTF8TransformFunctionWrapper(ref.from_) {} + + arrow::Result<std::shared_ptr<arrow::Buffer>> operator()( + const std::shared_ptr<arrow::Buffer>& src) { + ARROW_ASSIGN_OR_RAISE(auto dest, arrow::AllocateResizableBuffer(32)); + + size_t out_bytes_left = dest->size(); + char* out_buf = (char*)dest->data(); + size_t out_bytes_used = 0; Review comment: I moved the use of the types specific to `iconv()` into the `RIconvWrapper`, which doesn't minimize the conversions between signed and unsigned but does keep the iconv-specific stuff contained. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org