> If have any idea, feel free to comment to me.

i think the C code would look something like


void mb_trim(size_t *strlen, char **str, const size_t
trim_lengths_num, const size_t *trim_lengths, const char **trim_chars)
{
    size_t local_strlen = *strlen;
    char *local_str = *str;
    for (;;)
    {
        for (size_t i = 0; i < trim_lengths_num; ++i)
        {
            if (local_strlen >= trim_lengths[i] && memcmp(local_str,
trim_chars[i], trim_lengths[i]) == 0)
            {
                local_strlen -= trim_lengths[i];
                local_str += trim_lengths[i];
                goto remove_from_start_continue_2;
            }
        }
        break;
    remove_from_start_continue_2:;
    }
    for (;;)
    {
        for (size_t i = 0; i < trim_lengths_num; ++i)
        {
            if (local_strlen >= trim_lengths[i] && memcmp(((local_str
+ local_strlen) - trim_lengths[i]), trim_chars[i], trim_lengths[i]) ==
0)
            {
                local_strlen -= trim_lengths[i];
                goto remove_from_end_continue_2;
            }
        }
        break;
    remove_from_end_continue_2:;
    }
    memmove(*str, local_str, local_strlen);
    char *newstr = (char *)realloc(*str, local_strlen);
    if (newstr != nullptr)
    {
        *strlen = local_strlen;
        *str = newstr;
    }
    else
    {
        // some error handling
    }
}

with my simple testcode looking like

int main()
{
    const char *trim_chars[] = {
        " ",
        "!",
        // utf8 whitespace:
        "\xE2\x80\x80", // EN QUAD
        "\xE2\x80\x81", // EM QUAD
        "\xE2\x80\x82", // EN SPACE
        "\xE2\x80\x83", // EM SPACE
        "\xE2\x80\x84", // THREE-PER-EM SPACE
        "\xE2\x80\x85", // FOUR-PER-EM SPACE
        "\xE2\x80\x86", // SIX-PER-EM SPACE
    };
    size_t trim_lengths[] = {
        strlen(trim_chars[0]),
        strlen(trim_chars[1]),
        strlen(trim_chars[2]),
        strlen(trim_chars[3]),
        strlen(trim_chars[4]),
        strlen(trim_chars[5]),
        strlen(trim_chars[6]),
        strlen(trim_chars[7]),
        strlen(trim_chars[8]),
    };
    size_t trim_lengths_num = sizeof(trim_lengths) / sizeof(trim_lengths[0]);
    char *teststr = strdup("  !  \xE2\x80\x80\xE2\x80\x81\xE2\x80\x82
 Hello World !  \xE2\x80\x83\xE2\x80\x84\xE2\x80\x85\xE2\x80\x86  !
");
    // char *teststr = strdup("      !    Hello World !        !  ");
    size_t teststrlen = strlen(teststr);
    std::cout << teststrlen << ": \"" << std::string_view(teststr,
teststrlen) << "\"" << std::endl;
    mb_trim(&teststrlen, &teststr, trim_lengths_num, trim_lengths, trim_chars);
    std::cout << teststrlen << ": \"" << std::string_view(teststr,
teststrlen) << "\"" << std::endl;
    return 0;
}


On Sat, 30 Sept 2023 at 13:16, youkidearitai <youkideari...@gmail.com> wrote:
>
> 2023年9月30日(土) 17:42 Saki Takamachi <s...@sakiot.com>:
> >
> > > I also want to trim function of multibyte trim functions.
> >
> > > I think that in addition to mb_trim,
> > mb_ltrim and mb_rtrim are also necessary.
> >
> > Hi.
> >
> > Having a new option besides regex sounds like a good idea for me, as a user 
> > of a language that benefits from `mb_trim()`.
> >
> > Perhaps users are more intuitive about the usefulness of those functions 
> > who have spent more time cleaning "multibyte spaces".
>
> Hi
>
> Thanks for reply.
> I'm trying implements to trim for multibyte function.
> Please give me some time.
>
> If have any idea, feel free to comment to me.
>
> Regards
> Yuya
>
> --
> ---------------------------
> Yuya Hamada (tekimen)
> - https://tekitoh-memdhoi.info
> - https://github.com/youkidearitai
> -----------------------------
>
> --
> PHP Internals - PHP Runtime Development Mailing List
> To unsubscribe, visit: https://www.php.net/unsub.php
>

--
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: https://www.php.net/unsub.php

Reply via email to