.. probably a bunch of stuff that *could* be optimized or done better,
but one i saw just now is that instead of 2x nested loops and goto,
the outer loop and labels could be removed and the `goto
remove_from_start_continue_2;` could be replaced with `i=-1;` eg

    size_t local_strlen = *strlen;
    char *local_str = *str;
    for (size_t i = 0; i < trim_lengths_num; ++i)
    {
        if (local_strlen >= trim_lengths[i] && memcmp(local_str,
trim_chars[i], trim_lengths[i]) == 0)
        {
            local_strlen -= trim_lengths[i];
            local_str += trim_lengths[i];
            i = -1;
        }
    }

2x nested loops reduced to 1 loop, and goto removed~

On Sun, 1 Oct 2023 at 10:43, Hans Henrik Bergan <divinit...@gmail.com> wrote:
>
> > If have any idea, feel free to comment to me.
>
> i think the C code would look something like
>
>
> void mb_trim(size_t *strlen, char **str, const size_t
> trim_lengths_num, const size_t *trim_lengths, const char **trim_chars)
> {
>     size_t local_strlen = *strlen;
>     char *local_str = *str;
>     for (;;)
>     {
>         for (size_t i = 0; i < trim_lengths_num; ++i)
>         {
>             if (local_strlen >= trim_lengths[i] && memcmp(local_str,
> trim_chars[i], trim_lengths[i]) == 0)
>             {
>                 local_strlen -= trim_lengths[i];
>                 local_str += trim_lengths[i];
>                 goto remove_from_start_continue_2;
>             }
>         }
>         break;
>     remove_from_start_continue_2:;
>     }
>     for (;;)
>     {
>         for (size_t i = 0; i < trim_lengths_num; ++i)
>         {
>             if (local_strlen >= trim_lengths[i] && memcmp(((local_str
> + local_strlen) - trim_lengths[i]), trim_chars[i], trim_lengths[i]) ==
> 0)
>             {
>                 local_strlen -= trim_lengths[i];
>                 goto remove_from_end_continue_2;
>             }
>         }
>         break;
>     remove_from_end_continue_2:;
>     }
>     memmove(*str, local_str, local_strlen);
>     char *newstr = (char *)realloc(*str, local_strlen);
>     if (newstr != nullptr)
>     {
>         *strlen = local_strlen;
>         *str = newstr;
>     }
>     else
>     {
>         // some error handling
>     }
> }
>
> with my simple testcode looking like
>
> int main()
> {
>     const char *trim_chars[] = {
>         " ",
>         "!",
>         // utf8 whitespace:
>         "\xE2\x80\x80", // EN QUAD
>         "\xE2\x80\x81", // EM QUAD
>         "\xE2\x80\x82", // EN SPACE
>         "\xE2\x80\x83", // EM SPACE
>         "\xE2\x80\x84", // THREE-PER-EM SPACE
>         "\xE2\x80\x85", // FOUR-PER-EM SPACE
>         "\xE2\x80\x86", // SIX-PER-EM SPACE
>     };
>     size_t trim_lengths[] = {
>         strlen(trim_chars[0]),
>         strlen(trim_chars[1]),
>         strlen(trim_chars[2]),
>         strlen(trim_chars[3]),
>         strlen(trim_chars[4]),
>         strlen(trim_chars[5]),
>         strlen(trim_chars[6]),
>         strlen(trim_chars[7]),
>         strlen(trim_chars[8]),
>     };
>     size_t trim_lengths_num = sizeof(trim_lengths) / sizeof(trim_lengths[0]);
>     char *teststr = strdup("  !  \xE2\x80\x80\xE2\x80\x81\xE2\x80\x82
>  Hello World !  \xE2\x80\x83\xE2\x80\x84\xE2\x80\x85\xE2\x80\x86  !
> ");
>     // char *teststr = strdup("      !    Hello World !        !  ");
>     size_t teststrlen = strlen(teststr);
>     std::cout << teststrlen << ": \"" << std::string_view(teststr,
> teststrlen) << "\"" << std::endl;
>     mb_trim(&teststrlen, &teststr, trim_lengths_num, trim_lengths, 
> trim_chars);
>     std::cout << teststrlen << ": \"" << std::string_view(teststr,
> teststrlen) << "\"" << std::endl;
>     return 0;
> }
>
>
> On Sat, 30 Sept 2023 at 13:16, youkidearitai <youkideari...@gmail.com> wrote:
> >
> > 2023年9月30日(土) 17:42 Saki Takamachi <s...@sakiot.com>:
> > >
> > > > I also want to trim function of multibyte trim functions.
> > >
> > > > I think that in addition to mb_trim,
> > > mb_ltrim and mb_rtrim are also necessary.
> > >
> > > Hi.
> > >
> > > Having a new option besides regex sounds like a good idea for me, as a 
> > > user of a language that benefits from `mb_trim()`.
> > >
> > > Perhaps users are more intuitive about the usefulness of those functions 
> > > who have spent more time cleaning "multibyte spaces".
> >
> > Hi
> >
> > Thanks for reply.
> > I'm trying implements to trim for multibyte function.
> > Please give me some time.
> >
> > If have any idea, feel free to comment to me.
> >
> > Regards
> > Yuya
> >
> > --
> > ---------------------------
> > Yuya Hamada (tekimen)
> > - https://tekitoh-memdhoi.info
> > - https://github.com/youkidearitai
> > -----------------------------
> >
> > --
> > PHP Internals - PHP Runtime Development Mailing List
> > To unsubscribe, visit: https://www.php.net/unsub.php
> >

--
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: https://www.php.net/unsub.php

Reply via email to