patch 9.2.0432: blob to string conversion can be improved

Commit: 
https://github.com/vim/vim/commit/3bd25c63b4eda363683c06b0dcc58e74f563d3f2
Author: Yasuhiro Matsumoto <[email protected]>
Date:   Sat May 2 15:49:17 2026 +0000

    patch 9.2.0432: blob to string conversion can be improved
    
    Problem:  blob to string conversion can be improved
    Solution: Compute the output size up front and use a single alloc plus
              mch_memmove() (Yasuhiro Matsumoto).
    
    Replace per-byte ga_append/snprintf loops with bulk allocation and
    mch_memmove in three hot paths: blob2string() (used by string()),
    string_from_blob(), and the UTF-16/UCS path of f_blob2str(). For a
    16 MiB blob, string(blob) is ~28x faster and blob2str() is ~2x faster.
    
    Benchmark (16 MiB blob, 5 iterations, total seconds):
    
    | | Before | After | Speedup |
    |---|---:|---:|---:|
    | `string(blob)` | 6.422 | 0.225 | 28.5x |
    | `blob2str(b)` | 0.504 | 0.265 | 1.90x |
    | `blob2str(b, {encoding: 'utf-8'})` | 0.507 | 0.282 | 1.80x |
    | `blob2str(b, {encoding: 'utf-16le'})` | 0.407 | 0.202 | 2.01x |
    
    closes: #20112
    
    Signed-off-by: Yasuhiro Matsumoto <[email protected]>
    Signed-off-by: Christian Brabandt <[email protected]>

diff --git a/src/blob.c b/src/blob.c
index bbdf2873d..ad2b02bbb 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -263,33 +263,47 @@ write_blob(FILE *fd, blob_T *blob)
  * Convert a blob to a readable form: "0z00112233.44556677.8899"
  */
     char_u *
-blob2string(blob_T *blob, char_u **tofree, char_u *numbuf)
+blob2string(blob_T *blob, char_u **tofree, char_u *numbuf UNUSED)
 {
-    int                i;
-    garray_T    ga;
+    static const char hex_chars[] = "0123456789ABCDEF";
+    int                blen;
+    size_t     total;
+    char_u     *buf;
+    char_u     *p;
+    char_u     *src;
 
-    if (blob == NULL)
+    if (blob == NULL || (blen = blob_len(blob)) == 0)
+    {
+       *tofree = NULL;
+       return (char_u *)"0z";
+    }
+
+    // 2 ("0z") + 2 hex per byte + one '.' every 4 bytes + NUL terminator.
+    total = 2 + (size_t)blen * 2 + (size_t)((blen - 1) / 4) + 1;
+    buf = alloc(total);
+    if (buf == NULL)
     {
        *tofree = NULL;
        return (char_u *)"0z";
     }
 
-    // Store bytes in the growarray.
-    ga_init2(&ga, 1, 4000);
-    GA_CONCAT_LITERAL(&ga, "0z");
-    for (i = 0; i < blob_len(blob); i++)
+    p = buf;
+    *p++ = '0';
+    *p++ = 'z';
+    src = (char_u *)blob->bv_ga.ga_data;
+    for (int i = 0; i < blen; i++)
     {
-       size_t  numbuflen;
+       unsigned b;
 
        if (i > 0 && (i & 3) == 0)
-           GA_CONCAT_LITERAL(&ga, ".");
-       numbuflen = vim_snprintf_safelen((char *)numbuf, NUMBUFLEN,
-           "%02X", blob_get(blob, i));
-       ga_concat_len(&ga, numbuf, numbuflen);
-    }
-    ga_append(&ga, NUL);               // append a NUL at the end
-    *tofree = ga.ga_data;
-    return *tofree;
+           *p++ = '.';
+       b = src[i];
+       *p++ = hex_chars[b >> 4];
+       *p++ = hex_chars[b & 0xf];
+    }
+    *p = NUL;
+    *tofree = buf;
+    return buf;
 }
 
 /*
diff --git a/src/strings.c b/src/strings.c
index ec81ee469..ff63a3ef4 100644
--- a/src/strings.c
+++ b/src/strings.c
@@ -1262,43 +1262,42 @@ blob_from_string(char_u *str, blob_T *blob)
     static int
 string_from_blob(blob_T *blob, long *start_idx, string_T *ret)
 {
-    garray_T   str_ga;
-    long       blen;
-    int                idx;
-
-    ga_init2(&str_ga, sizeof(char), 80);
-
-    blen = blob_len(blob);
+    long       blen = blob_len(blob);
+    long       start = *start_idx;
+    char_u     *src;
+    char_u     *nl;
+    long       line_len;
 
-    for (idx = *start_idx; idx < blen; idx++)
+    if (start >= blen)
     {
-       char_u byte = (char_u)blob_get(blob, idx);
-       if (byte == NL)
-       {
-           idx++;
-           break;
-       }
-
-       if (byte == NUL)
-           byte = NL;
-
-       ga_append(&str_ga, byte);
+       ret->string = vim_strsave((char_u *)"");
+       ret->length = 0;
+       *start_idx = blen;
+       return (ret->string == NULL) ? FAIL : OK;
     }
 
-    if (str_ga.ga_data != NULL)
-    {
-       ret->string = vim_strnsave(str_ga.ga_data, str_ga.ga_len);
-       ret->length = str_ga.ga_len;
-    }
-    else
+    src = (char_u *)blob->bv_ga.ga_data + start;
+    nl = (char_u *)memchr(src, NL, (size_t)(blen - start));
+    line_len = (nl == NULL) ? (blen - start) : (long)(nl - src);
+
+    ret->string = alloc(line_len + 1);
+    if (ret->string == NULL)
     {
-       ret->string = vim_strsave((char_u *)"");
        ret->length = 0;
+       return FAIL;
     }
-    *start_idx = idx;
+    if (line_len > 0)
+       mch_memmove(ret->string, src, (size_t)line_len);
+    ret->string[line_len] = NUL;
+    ret->length = (size_t)line_len;
 
-    ga_clear(&str_ga);
-    return (ret->string == NULL) ? FAIL : OK;
+    // A NUL byte in the blob represents a NL in the resulting string.
+    for (long i = 0; i < line_len; i++)
+       if (ret->string[i] == NUL)
+           ret->string[i] = NL;
+
+    *start_idx = start + line_len + (nl != NULL ? 1 : 0);
+    return OK;
 }
 
 /*
@@ -1486,11 +1485,14 @@ f_blob2str(typval_T *argvars, typval_T *rettv)
        garray_T blob_ga;
        int nul_size = (from_prop & ENC_4BYTE) ? 4 : 2;
        ga_init2(&blob_ga, 1, blen + nul_size);
-       for (long i = 0; i < blen; i++)
-           ga_append(&blob_ga, (int)(unsigned char)blob_get(blob, i));
-       // Add NUL terminator (2 bytes for UTF-16/UCS-2, 4 bytes for 
UTF-32/UCS-4)
-       for (int i = 0; i < nul_size; i++)
-           ga_append(&blob_ga, NUL);
+       if (ga_grow(&blob_ga, blen + nul_size) == OK)
+       {
+           if (blen > 0)
+               mch_memmove(blob_ga.ga_data, blob->bv_ga.ga_data, (size_t)blen);
+           // NUL terminator (2 bytes for UTF-16/UCS-2, 4 bytes for 
UTF-32/UCS-4)
+           vim_memset((char_u *)blob_ga.ga_data + blen, 0, (size_t)nul_size);
+           blob_ga.ga_len = blen + nul_size;
+       }
 
        // Convert the entire blob at once
        vimconv_T vimconv;
diff --git a/src/version.c b/src/version.c
index 9208a5078..c75dd6a65 100644
--- a/src/version.c
+++ b/src/version.c
@@ -729,6 +729,8 @@ static char *(features[]) =
 
 static int included_patches[] =
 {   /* Add new patch number below this line */
+/**/
+    432,
 /**/
     431,
 /**/

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion visit 
https://groups.google.com/d/msgid/vim_dev/E1wJClA-002GZ7-0i%40256bit.org.

Raspunde prin e-mail lui