paleolimbot commented on a change in pull request #12030:
URL: https://github.com/apache/arrow/pull/12030#discussion_r777668121



##########
File path: r/src/io.cpp
##########
@@ -178,4 +180,134 @@ void io___BufferOutputStream__Write(
   StopIfNotOk(stream->Write(RAW(bytes), bytes.size()));
 }
 
+// TransformInputStream::TransformFunc wrapper
+
+class RIconvWrapper {
+ public:
+  RIconvWrapper(std::string to, std::string from)
+      : handle_(Riconv_open(to.c_str(), from.c_str())) {
+    if (handle_ == ((void*)-1)) {
+      cpp11::stop("Can't convert encoding from '%s' to '%s'", from.c_str(), 
to.c_str());
+    }
+  }
+
+  size_t iconv(const char** inbuf, size_t* inbytesleft, char** outbuf,
+               size_t* outbytesleft) {
+    return Riconv(handle_, inbuf, inbytesleft, outbuf, outbytesleft);
+  }
+
+  ~RIconvWrapper() {
+    if (handle_ != ((void*)-1)) {
+      Riconv_close(handle_);
+    }
+  }
+
+ protected:
+  void* handle_;
+};
+
+struct ReencodeUTF8TransformFunctionWrapper {
+  explicit ReencodeUTF8TransformFunctionWrapper(std::string from)
+      : from_(from), iconv_("UTF-8", from), n_pending_(0) {}
+
+  // This may get copied and we need a fresh RIconvWrapper for each copy.
+  ReencodeUTF8TransformFunctionWrapper(const 
ReencodeUTF8TransformFunctionWrapper& ref)
+      : ReencodeUTF8TransformFunctionWrapper(ref.from_) {}
+
+  arrow::Result<std::shared_ptr<arrow::Buffer>> operator()(
+      const std::shared_ptr<arrow::Buffer>& src) {
+    ARROW_ASSIGN_OR_RAISE(auto dest, arrow::AllocateResizableBuffer(32));
+
+    size_t out_bytes_left = dest->size();
+    char* out_buf = (char*)dest->data();
+    size_t out_bytes_used = 0;

Review comment:
       I moved the use of the types specific to `iconv()` into the 
`RIconvWrapper`, which doesn't minimize the conversions between signed and 
unsigned but does keep the iconv-specific stuff contained.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to