patch 9.2.0432: blob to string conversion can be improved
Commit:
https://github.com/vim/vim/commit/3bd25c63b4eda363683c06b0dcc58e74f563d3f2
Author: Yasuhiro Matsumoto <[email protected]>
Date: Sat May 2 15:49:17 2026 +0000
patch 9.2.0432: blob to string conversion can be improved
Problem: blob to string conversion can be improved
Solution: Compute the output size up front and use a single alloc plus
mch_memmove() (Yasuhiro Matsumoto).
Replace per-byte ga_append/snprintf loops with bulk allocation and
mch_memmove in three hot paths: blob2string() (used by string()),
string_from_blob(), and the UTF-16/UCS path of f_blob2str(). For a
16 MiB blob, string(blob) is ~28x faster and blob2str() is ~2x faster.
Benchmark (16 MiB blob, 5 iterations, total seconds):
| | Before | After | Speedup |
|---|---:|---:|---:|
| `string(blob)` | 6.422 | 0.225 | 28.5x |
| `blob2str(b)` | 0.504 | 0.265 | 1.90x |
| `blob2str(b, {encoding: 'utf-8'})` | 0.507 | 0.282 | 1.80x |
| `blob2str(b, {encoding: 'utf-16le'})` | 0.407 | 0.202 | 2.01x |
closes: #20112
Signed-off-by: Yasuhiro Matsumoto <[email protected]>
Signed-off-by: Christian Brabandt <[email protected]>
diff --git a/src/blob.c b/src/blob.c
index bbdf2873d..ad2b02bbb 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -263,33 +263,47 @@ write_blob(FILE *fd, blob_T *blob)
* Convert a blob to a readable form: "0z00112233.44556677.8899"
*/
char_u *
-blob2string(blob_T *blob, char_u **tofree, char_u *numbuf)
+blob2string(blob_T *blob, char_u **tofree, char_u *numbuf UNUSED)
{
- int i;
- garray_T ga;
+ static const char hex_chars[] = "0123456789ABCDEF";
+ int blen;
+ size_t total;
+ char_u *buf;
+ char_u *p;
+ char_u *src;
- if (blob == NULL)
+ if (blob == NULL || (blen = blob_len(blob)) == 0)
+ {
+ *tofree = NULL;
+ return (char_u *)"0z";
+ }
+
+ // 2 ("0z") + 2 hex per byte + one '.' every 4 bytes + NUL terminator.
+ total = 2 + (size_t)blen * 2 + (size_t)((blen - 1) / 4) + 1;
+ buf = alloc(total);
+ if (buf == NULL)
{
*tofree = NULL;
return (char_u *)"0z";
}
- // Store bytes in the growarray.
- ga_init2(&ga, 1, 4000);
- GA_CONCAT_LITERAL(&ga, "0z");
- for (i = 0; i < blob_len(blob); i++)
+ p = buf;
+ *p++ = '0';
+ *p++ = 'z';
+ src = (char_u *)blob->bv_ga.ga_data;
+ for (int i = 0; i < blen; i++)
{
- size_t numbuflen;
+ unsigned b;
if (i > 0 && (i & 3) == 0)
- GA_CONCAT_LITERAL(&ga, ".");
- numbuflen = vim_snprintf_safelen((char *)numbuf, NUMBUFLEN,
- "%02X", blob_get(blob, i));
- ga_concat_len(&ga, numbuf, numbuflen);
- }
- ga_append(&ga, NUL); // append a NUL at the end
- *tofree = ga.ga_data;
- return *tofree;
+ *p++ = '.';
+ b = src[i];
+ *p++ = hex_chars[b >> 4];
+ *p++ = hex_chars[b & 0xf];
+ }
+ *p = NUL;
+ *tofree = buf;
+ return buf;
}
/*
diff --git a/src/strings.c b/src/strings.c
index ec81ee469..ff63a3ef4 100644
--- a/src/strings.c
+++ b/src/strings.c
@@ -1262,43 +1262,42 @@ blob_from_string(char_u *str, blob_T *blob)
static int
string_from_blob(blob_T *blob, long *start_idx, string_T *ret)
{
- garray_T str_ga;
- long blen;
- int idx;
-
- ga_init2(&str_ga, sizeof(char), 80);
-
- blen = blob_len(blob);
+ long blen = blob_len(blob);
+ long start = *start_idx;
+ char_u *src;
+ char_u *nl;
+ long line_len;
- for (idx = *start_idx; idx < blen; idx++)
+ if (start >= blen)
{
- char_u byte = (char_u)blob_get(blob, idx);
- if (byte == NL)
- {
- idx++;
- break;
- }
-
- if (byte == NUL)
- byte = NL;
-
- ga_append(&str_ga, byte);
+ ret->string = vim_strsave((char_u *)"");
+ ret->length = 0;
+ *start_idx = blen;
+ return (ret->string == NULL) ? FAIL : OK;
}
- if (str_ga.ga_data != NULL)
- {
- ret->string = vim_strnsave(str_ga.ga_data, str_ga.ga_len);
- ret->length = str_ga.ga_len;
- }
- else
+ src = (char_u *)blob->bv_ga.ga_data + start;
+ nl = (char_u *)memchr(src, NL, (size_t)(blen - start));
+ line_len = (nl == NULL) ? (blen - start) : (long)(nl - src);
+
+ ret->string = alloc(line_len + 1);
+ if (ret->string == NULL)
{
- ret->string = vim_strsave((char_u *)"");
ret->length = 0;
+ return FAIL;
}
- *start_idx = idx;
+ if (line_len > 0)
+ mch_memmove(ret->string, src, (size_t)line_len);
+ ret->string[line_len] = NUL;
+ ret->length = (size_t)line_len;
- ga_clear(&str_ga);
- return (ret->string == NULL) ? FAIL : OK;
+ // A NUL byte in the blob represents a NL in the resulting string.
+ for (long i = 0; i < line_len; i++)
+ if (ret->string[i] == NUL)
+ ret->string[i] = NL;
+
+ *start_idx = start + line_len + (nl != NULL ? 1 : 0);
+ return OK;
}
/*
@@ -1486,11 +1485,14 @@ f_blob2str(typval_T *argvars, typval_T *rettv)
garray_T blob_ga;
int nul_size = (from_prop & ENC_4BYTE) ? 4 : 2;
ga_init2(&blob_ga, 1, blen + nul_size);
- for (long i = 0; i < blen; i++)
- ga_append(&blob_ga, (int)(unsigned char)blob_get(blob, i));
- // Add NUL terminator (2 bytes for UTF-16/UCS-2, 4 bytes for
UTF-32/UCS-4)
- for (int i = 0; i < nul_size; i++)
- ga_append(&blob_ga, NUL);
+ if (ga_grow(&blob_ga, blen + nul_size) == OK)
+ {
+ if (blen > 0)
+ mch_memmove(blob_ga.ga_data, blob->bv_ga.ga_data, (size_t)blen);
+ // NUL terminator (2 bytes for UTF-16/UCS-2, 4 bytes for
UTF-32/UCS-4)
+ vim_memset((char_u *)blob_ga.ga_data + blen, 0, (size_t)nul_size);
+ blob_ga.ga_len = blen + nul_size;
+ }
// Convert the entire blob at once
vimconv_T vimconv;
diff --git a/src/version.c b/src/version.c
index 9208a5078..c75dd6a65 100644
--- a/src/version.c
+++ b/src/version.c
@@ -729,6 +729,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
+/**/
+ 432,
/**/
431,
/**/
--
--
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
---
You received this message because you are subscribed to the Google Groups
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To view this discussion visit
https://groups.google.com/d/msgid/vim_dev/E1wJClA-002GZ7-0i%40256bit.org.