patch 9.1.1016: Not possible to convert string2blob and blob2string Commit: https://github.com/vim/vim/commit/1aefe1de0b20fe4966863e07efa14b6aa87323ee Author: Yegappan Lakshmanan <yegap...@yahoo.com> Date: Tue Jan 14 17:29:42 2025 +0100
patch 9.1.1016: Not possible to convert string2blob and blob2string Problem: Not possible to convert string2blob and blob2string Solution: add support for the blob2str() and str2blob() functions closes: #16373 Signed-off-by: Yegappan Lakshmanan <yegap...@yahoo.com> Signed-off-by: Christian Brabandt <c...@256bit.org> diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index a34c63aeb..10970bc51 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -1,4 +1,4 @@ -*builtin.txt* For Vim version 9.1. Last change: 2025 Jan 06 +*builtin.txt* For Vim version 9.1. Last change: 2025 Jan 14 VIM REFERENCE MANUAL by Bram Moolenaar @@ -72,6 +72,7 @@ base64_encode({blob}) String base64 encode the bytes in {blob} bindtextdomain({package}, {path}) Bool bind text domain to specified path blob2list({blob}) List convert {blob} into a list of numbers +blob2str({blob} [, {options}]) String convert {blob} into a String browse({save}, {title}, {initdir}, {default}) String put up a file requester browsedir({title}, {initdir}) String put up a directory requester @@ -609,6 +610,8 @@ split({expr} [, {pat} [, {keepempty}]]) sqrt({expr}) Float square root of {expr} srand([{expr}]) List get seed for |rand()| state([{what}]) String current state of Vim +str2blob({string} [, {options}]) + Blob convert {string} into a Blob str2float({expr} [, {quoted}]) Float convert String to Float str2list({expr} [, {utf8}]) List convert each character of {expr} to ASCII/UTF-8 value @@ -1289,6 +1292,38 @@ blob2list({blob}) *blob2list()* < Return type: list<any> or list<number> + +blob2str({blob} [, {options}]) *blob2str()* + Return a String in the current 'encoding' by converting the + bytes in {blob} into characters. + + If {options} is not supplied, the current 'encoding' value is + used to decode the bytes in {blob}. + + The argument {options} is a |Dict| and supports the following + items: + encoding Decode the bytes in {blob} using this + encoding. The value is a |String|. See + |encoding-names| for the supported values. + *E1515* + An error is given and an empty string is returned if + an invalid byte sequence is encountered in {blob}, + + Returns an empty String if blob is empty. + + See also |str2blob()| + + Examples: > + blob2str(0z6162) returns "ab" + blob2str(0zC2ABC2BB) returns "«»" + blob2str(0zABBB, {'encoding': 'latin1'}) returns "«»" +< + Can also be used as a |method|: > + GetBlob()->blob2str() +< + Return type: |String| + + *browse()* browse({save}, {title}, {initdir}, {default}) Put up a file requester. This only works when "has("browse")" @@ -10556,6 +10591,36 @@ state([{what}]) *state()* Return type: |String| +str2blob({string} [, {options}]) *str2blob()* + Return a Blob by converting the characters in {string} into + bytes. + + If {options} is not supplied, the current 'encoding' value is + used to convert the characters in {string} into bytes. + + The argument {options} is a |Dict| and supports the following + items: + encoding Encode the characters in {string} using this + encoding. The value is a |String|. See + |encoding-names| for the supported values. + + An error is given and an empty blob is returned if the + character encoding fails. + + Returns an empty Blob if {string} is empty. + + See also |blob2str()| + + Examples: > + str2blob("ab") returns 0z6162 + str2blob("«»") returns 0zC2ABC2BB + str2blob("«»", {'encoding': 'latin1'}) returns 0zABBB +< + Can also be used as a |method|: > + GetStr()->str2blob() +< + Return type: |Blob| + str2float({string} [, {quoted}]) *str2float()* Convert String {string} to a Float. This mostly works the same as when using a floating point number in an expression, diff --git a/runtime/doc/tags b/runtime/doc/tags index 05c212614..fbc8545ec 100644 --- a/runtime/doc/tags +++ b/runtime/doc/tags @@ -4585,6 +4585,7 @@ E1511 options.txt /*E1511* E1512 options.txt /*E1512* E1513 message.txt /*E1513* E1514 options.txt /*E1514* +E1515 builtin.txt /*E1515* E152 helphelp.txt /*E152* E153 helphelp.txt /*E153* E154 helphelp.txt /*E154* @@ -6192,6 +6193,7 @@ blob-index eval.txt /*blob-index* blob-literal eval.txt /*blob-literal* blob-modification eval.txt /*blob-modification* blob2list() builtin.txt /*blob2list()* +blob2str() builtin.txt /*blob2str()* blockwise-examples visual.txt /*blockwise-examples* blockwise-operators visual.txt /*blockwise-operators* blockwise-put change.txt /*blockwise-put* @@ -10253,6 +10255,7 @@ status-line windows.txt /*status-line* statusmsg-variable eval.txt /*statusmsg-variable* stl-%! options.txt /*stl-%!* stl-%{ options.txt /*stl-%{* +str2blob() builtin.txt /*str2blob()* str2float() builtin.txt /*str2float()* str2list() builtin.txt /*str2list()* str2nr() builtin.txt /*str2nr()* diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt index ded30e35d..cb4225734 100644 --- a/runtime/doc/usr_41.txt +++ b/runtime/doc/usr_41.txt @@ -1,4 +1,4 @@ -*usr_41.txt* For Vim version 9.1. Last change: 2025 Jan 02 +*usr_41.txt* For Vim version 9.1. Last change: 2025 Jan 14 VIM USER MANUAL - by Bram Moolenaar @@ -801,6 +801,8 @@ String manipulation: *string-functions* trim() trim characters from a string bindtextdomain() set message lookup translation base path gettext() lookup message translation + str2blob() convert a string into a blob + blob2str() convert a blob into a string List manipulation: *list-functions* get() get an item without error for wrong index diff --git a/runtime/doc/version9.txt b/runtime/doc/version9.txt index e212230e5..cae287684 100644 --- a/runtime/doc/version9.txt +++ b/runtime/doc/version9.txt @@ -1,4 +1,4 @@ -*version9.txt* For Vim version 9.1. Last change: 2025 Jan 12 +*version9.txt* For Vim version 9.1. Last change: 2025 Jan 14 VIM REFERENCE MANUAL by Bram Moolenaar @@ -41638,6 +41638,7 @@ Functions: ~ |base64_decode()| decode a base64 string into a blob |base64_encode()| encode a blob into a base64 string +|blob2str()| convert a blob into a string |bindtextdomain()| set message lookup translation base path |diff()| diff two Lists of strings |filecopy()| copy a file {from} to {to} @@ -41653,6 +41654,7 @@ Functions: ~ |matchbufline()| all the matches of a pattern in a buffer |matchstrlist()| all the matches of a pattern in a List of strings |popup_setbuf()| switch to a different buffer in a popup +|str2blob()| convert a string into a blob Autocommands: ~ diff --git a/src/errors.h b/src/errors.h index 2811a3275..6bab82650 100644 --- a/src/errors.h +++ b/src/errors.h @@ -3657,3 +3657,5 @@ EXTERN char e_winfixbuf_cannot_go_to_buffer[] INIT(= N_("E1513: Cannot switch buffer. 'winfixbuf' is enabled")); EXTERN char e_invalid_return_type_from_findfunc[] INIT(= N_("E1514: 'findfunc' did not return a List type")); +EXTERN char e_str_encoding_failed[] + INIT(= N_("E1515: Unable to convert %s '%s' encoding")); diff --git a/src/evalfunc.c b/src/evalfunc.c index ef30792ad..63142523c 100644 --- a/src/evalfunc.c +++ b/src/evalfunc.c @@ -1151,6 +1151,7 @@ static argcheck_T arg2_string_number[] = {arg_string, arg_number}; static argcheck_T arg2_string_or_list_dict[] = {arg_string_or_list_any, arg_dict_any}; static argcheck_T arg2_string_or_list_number[] = {arg_string_or_list_any, arg_number}; static argcheck_T arg2_string_string_or_number[] = {arg_string, arg_string_or_nr}; +static argcheck_T arg2_blob_dict[] = {arg_blob, arg_dict_any}; static argcheck_T arg3_any_list_dict[] = {arg_any, arg_list_any, arg_dict_any}; static argcheck_T arg3_buffer_lnum_lnum[] = {arg_buffer, arg_lnum, arg_lnum}; static argcheck_T arg3_buffer_number_number[] = {arg_buffer, arg_number, arg_number}; @@ -1844,6 +1845,8 @@ static funcentry_T global_functions[] = ret_bool, f_bindtextdomain}, {"blob2list", 1, 1, FEARG_1, arg1_blob, ret_list_number, f_blob2list}, + {"blob2str", 1, 2, FEARG_1, arg2_blob_dict, + ret_string, f_blob2str}, {"browse", 4, 4, 0, arg4_browse, ret_string, f_browse}, {"browsedir", 2, 2, 0, arg2_string, @@ -2710,6 +2713,8 @@ static funcentry_T global_functions[] = ret_list_number, f_srand}, {"state", 0, 1, FEARG_1, arg1_string, ret_string, f_state}, + {"str2blob", 1, 2, FEARG_1, arg2_string_dict, + ret_blob, f_str2blob}, {"str2float", 1, 2, FEARG_1, arg2_string_bool, ret_float, f_str2float}, {"str2list", 1, 2, FEARG_1, arg2_string_bool, diff --git a/src/mbyte.c b/src/mbyte.c index 4a7eada25..157018266 100644 --- a/src/mbyte.c +++ b/src/mbyte.c @@ -2106,6 +2106,17 @@ utf_byte2len(int b) return utf8len_tab[b]; } +/* + * Return length of UTF-8 character, obtained from the first byte. + * "b" must be between 0 and 255! + * Returns 0 for an invalid first byte value. + */ + int +utf_byte2len_zero(int b) +{ + return utf8len_tab_zero[b]; +} + /* * Get the length of UTF-8 byte sequence "p[size]". Does not include any * following composing characters. diff --git a/src/proto/mbyte.pro b/src/proto/mbyte.pro index bb976e3bf..7061bab84 100644 --- a/src/proto/mbyte.pro +++ b/src/proto/mbyte.pro @@ -31,6 +31,7 @@ int utfc_ptr2char_len(char_u *p, int *pcc, int maxlen); int utfc_char2bytes(int off, char_u *buf); int utf_ptr2len(char_u *p); int utf_byte2len(int b); +int utf_byte2len_zero(int b); int utf_ptr2len_len(char_u *p, int size); int utfc_ptr2len(char_u *p); int utfc_ptr2len_len(char_u *p, int size); diff --git a/src/proto/strings.pro b/src/proto/strings.pro index c25555f1c..ddc53cce0 100644 --- a/src/proto/strings.pro +++ b/src/proto/strings.pro @@ -31,6 +31,8 @@ void string_reduce(typval_T *argvars, typval_T *expr, typval_T *rettv); void f_byteidx(typval_T *argvars, typval_T *rettv); void f_byteidxcomp(typval_T *argvars, typval_T *rettv); void f_charidx(typval_T *argvars, typval_T *rettv); +void f_blob2str(typval_T *argvars, typval_T *rettv); +void f_str2blob(typval_T *argvars, typval_T *rettv); void f_str2list(typval_T *argvars, typval_T *rettv); void f_str2nr(typval_T *argvars, typval_T *rettv); void f_strgetchar(typval_T *argvars, typval_T *rettv); diff --git a/src/strings.c b/src/strings.c index 54ac17873..c26914d0d 100644 --- a/src/strings.c +++ b/src/strings.c @@ -1213,6 +1213,146 @@ f_charidx(typval_T *argvars, typval_T *rettv) rettv->vval.v_number = len > 0 ? len - 1 : 0; } +/* + * Convert the string "str", from encoding "from" to encoding "to". + */ + static char_u * +convert_string(char_u *str, char_u *from, char_u *to) +{ + vimconv_T vimconv; + + vimconv.vc_type = CONV_NONE; + if (convert_setup(&vimconv, from, to) == FAIL) + return NULL; + vimconv.vc_fail = TRUE; + if (vimconv.vc_type == CONV_NONE) + str = vim_strsave(str); + else + str = string_convert(&vimconv, str, NULL); + convert_setup(&vimconv, NULL, NULL); + + return str; +} + +/* + * "blob2str()" function + * Converts a blob to a string, ensuring valid UTF-8 encoding. + */ + void +f_blob2str(typval_T *argvars, typval_T *rettv) +{ + blob_T *blob; + char_u *str; + char_u *p; + int blen; + + if (check_for_blob_arg(argvars, 0) == FAIL + || check_for_opt_dict_arg(argvars, 1) == FAIL) + return; + + blob = argvars->vval.v_blob; + blen = blob_len(blob); + + rettv->v_type = VAR_STRING; + + str = alloc(blen + 1); + if (str == NULL) + return; + + for (int i = 0; i < blen; i++) + str[i] = (char_u)blob_get(blob, i); + str[blen] = NUL; + + p = str; + if (argvars[1].v_type != VAR_UNKNOWN) + { + dict_T *d = argvars[1].vval.v_dict; + if (d != NULL) + { + char_u *enc = dict_get_string(d, "encoding", FALSE); + if (enc != NULL) + { + char_u *from = enc_canonize(enc_skip(enc)); + p = convert_string(str, from, p_enc); + vim_free(str); + if (p == NULL) + { + semsg(_(e_str_encoding_failed), "from", from); + vim_free(from); + return; + } + vim_free(from); + } + } + } + + if (STRCMP(p_enc, "utf-8") == 0 || STRCMP(p_enc, "utf8") == 0) + { + if (!utf_valid_string(p, NULL)) + { + semsg(_(e_str_encoding_failed), "from", p_enc); + vim_free(p); + return; + } + } + + rettv->vval.v_string = p; +} + +/* + * "str2blob()" function + */ + void +f_str2blob(typval_T *argvars, typval_T *rettv) +{ + blob_T *blob; + char_u *p; + size_t len; + + if (check_for_string_arg(argvars, 0) == FAIL + || check_for_opt_dict_arg(argvars, 1) == FAIL) + return; + + if (rettv_blob_alloc(rettv) == FAIL) + return; + + blob = rettv->vval.v_blob; + + p = tv_get_string_chk(&argvars[0]); + if (p == NULL) + return; + + int free_str = FALSE; + if (argvars[1].v_type != VAR_UNKNOWN) + { + dict_T *d = argvars[1].vval.v_dict; + if (d != NULL) + { + char_u *enc = dict_get_string(d, "encoding", FALSE); + if (enc != NULL) + { + char_u *to = enc_canonize(enc_skip(enc)); + p = convert_string(p, p_enc, to); + if (p == NULL) + { + semsg(_(e_str_encoding_failed), "to", to); + vim_free(to); + return; + } + vim_free(to); + free_str = TRUE; + } + } + } + + len = STRLEN(p); + for (size_t i = 0; i < len; i++) + ga_append(&blob->bv_ga, (int)p[i]); + + if (free_str) + vim_free(p); +} + /* * "str2list()" function */ diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim index f80754f0a..8b0af9101 100644 --- a/src/testdir/test_functions.vim +++ b/src/testdir/test_functions.vim @@ -4257,4 +4257,73 @@ func Test_base64_encoding() call v9.CheckLegacyAndVim9Success(lines) endfunc +" Tests for the str2blob() function +func Test_str2blob() + let lines =<< trim END + call assert_equal(0z, str2blob("")) + call assert_fails("call str2blob([])", 'E1174: String required for argument 1') + call assert_equal(0z6162, str2blob("ab")) + call assert_equal(0zC2ABC2BB, str2blob("«»")) + call assert_equal(0zC59DC59F, str2blob("ŝş")) + call assert_equal(0zE0AE85E0.AE87, str2blob("அஇ")) + call assert_equal(0zF09F81B0.F09F81B3, str2blob("🁰🁳")) + call assert_equal(0z616263, str2blob('abc', {})) + call assert_equal(0zABBB, str2blob('«»', {'encoding': 'latin1'})) + call assert_equal(0zC2ABC2BB, str2blob('«»', {'encoding': 'utf8'})) + + call assert_fails("call str2blob('abc', [])", 'E1206: Dictionary required for argument 2') + call assert_fails("call str2blob('abc', {'encoding': []})", 'E730: Using a List as a String') + call assert_fails("call str2blob('abc', {'encoding': 'ab12xy'})", 'E1515: Unable to convert to ''ab12xy'' encoding') + call assert_fails("call str2blob('ŝş', {'encoding': 'latin1'})", 'E1515: Unable to convert to ''latin1'' encoding') + call assert_fails("call str2blob('அஇ', {'encoding': 'latin1'})", 'E1515: Unable to convert to ''latin1'' encoding') + call assert_fails("call str2blob('🁰🁳', {'encoding': 'latin1'})", 'E1515: Unable to convert to ''latin1'' encoding') + END + call v9.CheckLegacyAndVim9Success(lines) +endfunc + +" Tests for the blob2str() function +func Test_blob2str() + let lines =<< trim END + call assert_equal("", blob2str(0z)) + call assert_fails("call blob2str([])", 'E1238: Blob required for argument 1') + call assert_equal("ab", blob2str(0z6162)) + call assert_equal("«»", blob2str(0zC2ABC2BB)) + call assert_equal("ŝş", blob2str(0zC59DC59F)) + call assert_equal("அஇ", blob2str(0zE0AE85E0.AE87)) + call assert_equal("🁰🁳", blob2str(0zF09F81B0.F09F81B3)) + call assert_equal('«»', blob2str(0zABBB, {'encoding': 'latin1'})) + call assert_equal('«»', blob2str(0zC2ABC2BB, {'encoding': 'utf8'})) + + #" Invalid encoding + call assert_fails("call blob2str(0z80)", "E1515: Unable to convert from 'utf-8' encoding") + call assert_fails("call blob2str(0zC0)", "E1515: Unable to convert from 'utf-8' encoding") + call assert_fails("call blob2str(0zE0)", "E1515: Unable to convert from 'utf-8' encoding") + call assert_fails("call blob2str(0zF0)", "E1515: Unable to convert from 'utf-8' encoding") + + call assert_fails("call blob2str(0z6180)", "E1515: Unable to convert from 'utf-8' encoding") + call assert_fails("call blob2str(0z61C0)", "E1515: Unable to convert from 'utf-8' encoding") + call assert_fails("call blob2str(0z61E0)", "E1515: Unable to convert from 'utf-8' encoding") + call assert_fails("call blob2str(0z61F0)", "E1515: Unable to convert from 'utf-8' encoding") + + call assert_fails("call blob2str(0zC0C0)", "E1515: Unable to convert from 'utf-8' encoding") + call assert_fails("call blob2str(0z61C0C0)", "E1515: Unable to convert from 'utf-8' encoding") + + call assert_fails("call blob2str(0zE0)", "E1515: Unable to convert from 'utf-8' encoding") + call assert_fails("call blob2str(0zE080)", "E1515: Unable to convert from 'utf-8' encoding") + call assert_fails("call blob2str(0zE080C0)", "E1515: Unable to convert from 'utf-8' encoding") + call assert_fails("call blob2str(0z61E080C0)", "E1515: Unable to convert from 'utf-8' encoding") + + call assert_fails("call blob2str(0zF08080C0)", "E1515: Unable to convert from 'utf-8' encoding") + call assert_fails("call blob2str(0z61F08080C0)", "E1515: Unable to convert from 'utf-8' encoding") + call assert_fails("call blob2str(0zF0)", "E1515: Unable to convert from 'utf-8' encoding") + call assert_fails("call blob2str(0zF080)", "E1515: Unable to convert from 'utf-8' encoding") + call assert_fails("call blob2str(0zF08080)", "E1515: Unable to convert from 'utf-8' encoding") + + call assert_fails("call blob2str(0z6162, [])", 'E1206: Dictionary required for argument 2') + call assert_fails("call blob2str(0z6162, {'encoding': []})", 'E730: Using a List as a String') + call assert_fails("call blob2str(0z6162, {'encoding': 'ab12xy'})", 'E1515: Unable to convert from ''ab12xy'' encoding') + END + call v9.CheckLegacyAndVim9Success(lines) +endfunc + " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/version.c b/src/version.c index 96a51da80..13ffc5d4e 100644 --- a/src/version.c +++ b/src/version.c @@ -704,6 +704,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 1016, /**/ 1015, /**/ -- -- You received this message from the "vim_dev" maillist. Do not top-post! Type your reply below the text you are replying to. For more information, visit http://www.vim.org/maillist.php --- You received this message because you are subscribed to the Google Groups "vim_dev" group. To unsubscribe from this group and stop receiving emails from it, send an email to vim_dev+unsubscr...@googlegroups.com. To view this discussion visit https://groups.google.com/d/msgid/vim_dev/E1tXk2K-00BsBQ-B2%40256bit.org.