pitrou commented on code in PR #46815:
URL: https://github.com/apache/arrow/pull/46815#discussion_r2153850325


##########
cpp/src/arrow/compute/kernels/scalar_string_utf8.cc:
##########
@@ -987,13 +987,70 @@ const FunctionDoc utf8_rpad_doc(
      "the given UTF8 codeunit.\nNull values emit null."),
     {"strings"}, "PadOptions", /*options_required=*/true);
 
+struct Utf8ZFillTransform : public StringTransformBase {
+  using State = OptionsWrapper<PadOptions>;
+
+  const PadOptions& options_;
+
+  explicit Utf8ZFillTransform(const PadOptions& options) : options_(options) {}
+
+  Status PreExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) 
override {
+    auto str = reinterpret_cast<const uint8_t*>(options_.padding.data());
+    auto strlen = options_.padding.size();
+    if (util::UTF8Length(str, str + strlen) != 1) {
+      return Status::Invalid("Padding must be one codepoint, got '", 
options_.padding,
+                             "'");
+    }
+    return Status::OK();
+  }
+
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    return input_ncodeunits + 4 * ninputs * options_.width;
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    const int64_t input_width = util::UTF8Length(input, input + 
input_string_ncodeunits);
+    if (input_width >= options_.width) {
+      std::copy(input, input + input_string_ncodeunits, output);
+      return input_string_ncodeunits;
+    }
+    const int64_t spaces = options_.width - input_width;
+    uint8_t* start = output;
+    // sign-aware padding:
+    if (input_string_ncodeunits > 0 && (input[0] == '+' || input[0] == '-')) {
+      *output++ = input[0];
+      input++;
+      input_string_ncodeunits--;
+    }
+    int64_t num_zeros = spaces;
+    while (num_zeros > 0) {
+      output = std::copy(options_.padding.begin(), options_.padding.end(), 
output);
+      num_zeros--;
+    }
+    output = std::copy(input, input + input_string_ncodeunits, output);
+    return output - start;
+  }
+};
+
+template <typename Type>
+using Utf8ZFill = StringTransformExecWithState<Type, Utf8ZFillTransform>;
+
+const FunctionDoc utf8_zfill_doc(
+    "Left-pad strings with zeros, handling signs like Python str.zfill()",

Review Comment:
   Can we avoid mentioning Python's `str.zfill`? This is going to be used by 
people who don't know Python.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to