pkarashchenko commented on code in PR #8802: URL: https://github.com/apache/nuttx/pull/8802#discussion_r1136312153
########## include/iconv.h: ########## @@ -0,0 +1,60 @@ +/******************************************************************************** Review Comment: ```suggestion /**************************************************************************** ``` ########## include/iconv.h: ########## @@ -0,0 +1,60 @@ +/******************************************************************************** + * include/iconv.h + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ********************************************************************************/ Review Comment: ```suggestion ****************************************************************************/ ``` and same in all similar places ########## libs/libc/locale/lib_iconv.c: ########## @@ -0,0 +1,1423 @@ +/**************************************************************************** + * libs/libc/locale/lib_iconv.c + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ****************************************************************************/ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include <iconv.h> +#include <errno.h> +#include <wchar.h> +#include <string.h> +#include <stdlib.h> +#include <limits.h> +#include <stdint.h> +#include <locale.h> + +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ + +#define UTF_32BE 0300 +#define UTF_16LE 0301 +#define UTF_16BE 0302 +#define UTF_32LE 0303 +#define UCS2BE 0304 +#define UCS2LE 0305 +#define WCHAR_T 0306 +#define US_ASCII 0307 +#define UTF_8 0310 +#define UTF_16 0312 +#define UTF_32 0313 +#define UCS2 0314 +#define EUC_JP 0320 +#define SHIFT_JIS 0321 +#define ISO2022_JP 0322 +#define GB18030 0330 +#define GBK 0331 +#define GB2312 0332 +#define BIG5 0340 +#define EUC_KR 0350 + +/**************************************************************************** + * Private Types + ****************************************************************************/ + +struct stateful_cd +{ + iconv_t base_cd; + unsigned state; +}; + +/**************************************************************************** + * Private Data + ****************************************************************************/ + +/* Definitions of g_charmaps. Each charmap consists of: + * 1. Empty-string-terminated list of null-terminated aliases. + * 2. Special type code or number of elided quads of entries. + * 3. Character table (size determined by field 2), consisting + * of 5 bytes for every 4 characters, interpreted as 10-bit + * indices into the g_legacy_chars table. + */ + +static const unsigned char g_charmaps[] = +{ + "utf8\0char\0\0\310" + "wchart\0\0\306" + "ucs2be\0\0\304" + "ucs2le\0\0\305" + "utf16be\0\0\302" + "utf16le\0\0\301" + "ucs4be\0utf32be\0\0\300" + "ucs4le\0utf32le\0\0\303" + "ascii\0usascii\0iso646\0iso646us\0\0\307" + "utf16\0\0\312" + "ucs4\0utf32\0\0\313" + "ucs2\0\0\314" +#ifdef CONFIG_LIBC_LOCALE_JAPANESE + "eucjp\0\0\320" + "shiftjis\0sjis\0\0\321" + "iso2022jp\0\0\322" +#endif +#ifdef CONFIG_LIBC_LOCALE_CHINESE + "gb18030\0\0\330" + "gbk\0\0\331" + "gb2312\0\0\332" + "big5\0bigfive\0cp950\0big5hkscs\0\0\340" +#endif +#ifdef CONFIG_LIBC_LOCALE_KOREAN + "euckr\0ksc5601\0ksx1001\0cp949\0\0\350" +#endif +#ifdef CONFIG_LIBC_LOCALE_CODEPAGES + #include "codepages.h" Review Comment: ```suggestion # include "codepages.h" ``` ########## libs/libc/locale/lib_iconv.c: ########## @@ -0,0 +1,1423 @@ +/**************************************************************************** + * libs/libc/locale/lib_iconv.c + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ****************************************************************************/ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include <iconv.h> +#include <errno.h> +#include <wchar.h> +#include <string.h> +#include <stdlib.h> +#include <limits.h> +#include <stdint.h> +#include <locale.h> + +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ + +#define UTF_32BE 0300 +#define UTF_16LE 0301 +#define UTF_16BE 0302 +#define UTF_32LE 0303 +#define UCS2BE 0304 +#define UCS2LE 0305 +#define WCHAR_T 0306 +#define US_ASCII 0307 +#define UTF_8 0310 +#define UTF_16 0312 +#define UTF_32 0313 +#define UCS2 0314 +#define EUC_JP 0320 +#define SHIFT_JIS 0321 +#define ISO2022_JP 0322 +#define GB18030 0330 +#define GBK 0331 +#define GB2312 0332 +#define BIG5 0340 +#define EUC_KR 0350 + +/**************************************************************************** + * Private Types + ****************************************************************************/ + +struct stateful_cd +{ + iconv_t base_cd; + unsigned state; +}; + +/**************************************************************************** + * Private Data + ****************************************************************************/ + +/* Definitions of g_charmaps. Each charmap consists of: + * 1. Empty-string-terminated list of null-terminated aliases. + * 2. Special type code or number of elided quads of entries. + * 3. Character table (size determined by field 2), consisting + * of 5 bytes for every 4 characters, interpreted as 10-bit + * indices into the g_legacy_chars table. + */ + +static const unsigned char g_charmaps[] = +{ + "utf8\0char\0\0\310" + "wchart\0\0\306" + "ucs2be\0\0\304" + "ucs2le\0\0\305" + "utf16be\0\0\302" + "utf16le\0\0\301" + "ucs4be\0utf32be\0\0\300" + "ucs4le\0utf32le\0\0\303" + "ascii\0usascii\0iso646\0iso646us\0\0\307" + "utf16\0\0\312" + "ucs4\0utf32\0\0\313" + "ucs2\0\0\314" +#ifdef CONFIG_LIBC_LOCALE_JAPANESE + "eucjp\0\0\320" + "shiftjis\0sjis\0\0\321" + "iso2022jp\0\0\322" +#endif +#ifdef CONFIG_LIBC_LOCALE_CHINESE + "gb18030\0\0\330" + "gbk\0\0\331" + "gb2312\0\0\332" + "big5\0bigfive\0cp950\0big5hkscs\0\0\340" +#endif +#ifdef CONFIG_LIBC_LOCALE_KOREAN + "euckr\0ksc5601\0ksx1001\0cp949\0\0\350" +#endif +#ifdef CONFIG_LIBC_LOCALE_CODEPAGES + #include "codepages.h" +#endif +}; + +/* Table of characters that appear in legacy 8-bit codepages, + * limited to 1024 slots (10 bit indices). The first 256 entries + * are elided since those characters are obviously all included. + */ + +static const unsigned short g_legacy_chars[] = +{ + #include "legacychars.h" +}; + +#ifdef CONFIG_LIBC_LOCALE_JAPANESE +static const unsigned short g_jis0208[84][94] = +{ + #include "jis0208.h" +}; + +static const unsigned short g_rev_jis[] = +{ + #include "revjis.h" +}; +#endif + +#ifdef CONFIG_LIBC_LOCALE_CHINESE +static const unsigned short g_gb18030[126][190] = +{ + #include "gb18030.h" +}; + +static const unsigned short g_big5[89][157] = +{ + #include "big5.h" +}; + +static const unsigned short g_hkscs[] = +{ + #include "hkscs.h" +}; +#endif + +#ifdef CONFIG_LIBC_LOCALE_KOREAN +static const unsigned short g_ksc[93][94] = +{ + #include "ksc.h" +}; +#endif + +/**************************************************************************** + * Private Functions + ****************************************************************************/ + +static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b) +{ + for (; *a && *b; a++, b++) + { + while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U) + a++; + + if ((*a | 32U) != *b) + { + return 1; + } + } + + return *a != *b; +} + +static size_t find_charmap(FAR const void *name) +{ + FAR const unsigned char *s; + + if (*(FAR char *)name == 0) Review Comment: ```suggestion if (*(FAR char *)name == '\0') ``` ########## libs/libc/locale/lib_iconv.c: ########## @@ -0,0 +1,1423 @@ +/**************************************************************************** + * libs/libc/locale/lib_iconv.c + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ****************************************************************************/ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include <iconv.h> +#include <errno.h> +#include <wchar.h> +#include <string.h> +#include <stdlib.h> +#include <limits.h> +#include <stdint.h> +#include <locale.h> + +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ + +#define UTF_32BE 0300 +#define UTF_16LE 0301 +#define UTF_16BE 0302 +#define UTF_32LE 0303 +#define UCS2BE 0304 +#define UCS2LE 0305 +#define WCHAR_T 0306 +#define US_ASCII 0307 +#define UTF_8 0310 +#define UTF_16 0312 +#define UTF_32 0313 +#define UCS2 0314 +#define EUC_JP 0320 +#define SHIFT_JIS 0321 +#define ISO2022_JP 0322 +#define GB18030 0330 +#define GBK 0331 +#define GB2312 0332 +#define BIG5 0340 +#define EUC_KR 0350 + +/**************************************************************************** + * Private Types + ****************************************************************************/ + +struct stateful_cd +{ + iconv_t base_cd; + unsigned state; +}; + +/**************************************************************************** + * Private Data + ****************************************************************************/ + +/* Definitions of g_charmaps. Each charmap consists of: + * 1. Empty-string-terminated list of null-terminated aliases. + * 2. Special type code or number of elided quads of entries. + * 3. Character table (size determined by field 2), consisting + * of 5 bytes for every 4 characters, interpreted as 10-bit + * indices into the g_legacy_chars table. + */ + +static const unsigned char g_charmaps[] = +{ + "utf8\0char\0\0\310" + "wchart\0\0\306" + "ucs2be\0\0\304" + "ucs2le\0\0\305" + "utf16be\0\0\302" + "utf16le\0\0\301" + "ucs4be\0utf32be\0\0\300" + "ucs4le\0utf32le\0\0\303" + "ascii\0usascii\0iso646\0iso646us\0\0\307" + "utf16\0\0\312" + "ucs4\0utf32\0\0\313" + "ucs2\0\0\314" +#ifdef CONFIG_LIBC_LOCALE_JAPANESE + "eucjp\0\0\320" + "shiftjis\0sjis\0\0\321" + "iso2022jp\0\0\322" +#endif +#ifdef CONFIG_LIBC_LOCALE_CHINESE + "gb18030\0\0\330" + "gbk\0\0\331" + "gb2312\0\0\332" + "big5\0bigfive\0cp950\0big5hkscs\0\0\340" +#endif +#ifdef CONFIG_LIBC_LOCALE_KOREAN + "euckr\0ksc5601\0ksx1001\0cp949\0\0\350" +#endif +#ifdef CONFIG_LIBC_LOCALE_CODEPAGES + #include "codepages.h" +#endif +}; + +/* Table of characters that appear in legacy 8-bit codepages, + * limited to 1024 slots (10 bit indices). The first 256 entries + * are elided since those characters are obviously all included. + */ + +static const unsigned short g_legacy_chars[] = +{ + #include "legacychars.h" +}; + +#ifdef CONFIG_LIBC_LOCALE_JAPANESE +static const unsigned short g_jis0208[84][94] = +{ + #include "jis0208.h" +}; + +static const unsigned short g_rev_jis[] = +{ + #include "revjis.h" +}; +#endif + +#ifdef CONFIG_LIBC_LOCALE_CHINESE +static const unsigned short g_gb18030[126][190] = +{ + #include "gb18030.h" +}; + +static const unsigned short g_big5[89][157] = +{ + #include "big5.h" +}; + +static const unsigned short g_hkscs[] = +{ + #include "hkscs.h" +}; +#endif + +#ifdef CONFIG_LIBC_LOCALE_KOREAN +static const unsigned short g_ksc[93][94] = +{ + #include "ksc.h" +}; +#endif + +/**************************************************************************** + * Private Functions + ****************************************************************************/ + +static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b) +{ + for (; *a && *b; a++, b++) + { + while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U) + a++; + + if ((*a | 32U) != *b) + { + return 1; + } + } + + return *a != *b; +} + +static size_t find_charmap(FAR const void *name) +{ + FAR const unsigned char *s; + + if (*(FAR char *)name == 0) + { + /* "utf8" */ + + name = g_charmaps; + } + + for (s = g_charmaps; *s; ) + { + if (!fuzzycmp(name, s)) + { + for (; *s; s += strlen((FAR void *)s) + 1); + return s + 1 - g_charmaps; + } + + s += strlen((FAR void *)s)+1; + if (*s == 0) Review Comment: ```suggestion if (*s == '\0') ``` ########## libs/libc/locale/lib_iconv.c: ########## @@ -0,0 +1,1423 @@ +/**************************************************************************** + * libs/libc/locale/lib_iconv.c + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ****************************************************************************/ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include <iconv.h> +#include <errno.h> +#include <wchar.h> +#include <string.h> +#include <stdlib.h> +#include <limits.h> +#include <stdint.h> +#include <locale.h> + +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ + +#define UTF_32BE 0300 +#define UTF_16LE 0301 +#define UTF_16BE 0302 +#define UTF_32LE 0303 +#define UCS2BE 0304 +#define UCS2LE 0305 +#define WCHAR_T 0306 +#define US_ASCII 0307 +#define UTF_8 0310 +#define UTF_16 0312 +#define UTF_32 0313 +#define UCS2 0314 +#define EUC_JP 0320 +#define SHIFT_JIS 0321 +#define ISO2022_JP 0322 +#define GB18030 0330 +#define GBK 0331 +#define GB2312 0332 +#define BIG5 0340 +#define EUC_KR 0350 + +/**************************************************************************** + * Private Types + ****************************************************************************/ + +struct stateful_cd +{ + iconv_t base_cd; + unsigned state; +}; + +/**************************************************************************** + * Private Data + ****************************************************************************/ + +/* Definitions of g_charmaps. Each charmap consists of: + * 1. Empty-string-terminated list of null-terminated aliases. + * 2. Special type code or number of elided quads of entries. + * 3. Character table (size determined by field 2), consisting + * of 5 bytes for every 4 characters, interpreted as 10-bit + * indices into the g_legacy_chars table. + */ + +static const unsigned char g_charmaps[] = +{ + "utf8\0char\0\0\310" + "wchart\0\0\306" + "ucs2be\0\0\304" + "ucs2le\0\0\305" + "utf16be\0\0\302" + "utf16le\0\0\301" + "ucs4be\0utf32be\0\0\300" + "ucs4le\0utf32le\0\0\303" + "ascii\0usascii\0iso646\0iso646us\0\0\307" + "utf16\0\0\312" + "ucs4\0utf32\0\0\313" + "ucs2\0\0\314" +#ifdef CONFIG_LIBC_LOCALE_JAPANESE + "eucjp\0\0\320" + "shiftjis\0sjis\0\0\321" + "iso2022jp\0\0\322" +#endif +#ifdef CONFIG_LIBC_LOCALE_CHINESE + "gb18030\0\0\330" + "gbk\0\0\331" + "gb2312\0\0\332" + "big5\0bigfive\0cp950\0big5hkscs\0\0\340" +#endif +#ifdef CONFIG_LIBC_LOCALE_KOREAN + "euckr\0ksc5601\0ksx1001\0cp949\0\0\350" +#endif +#ifdef CONFIG_LIBC_LOCALE_CODEPAGES + #include "codepages.h" +#endif +}; + +/* Table of characters that appear in legacy 8-bit codepages, + * limited to 1024 slots (10 bit indices). The first 256 entries + * are elided since those characters are obviously all included. + */ + +static const unsigned short g_legacy_chars[] = +{ + #include "legacychars.h" +}; + +#ifdef CONFIG_LIBC_LOCALE_JAPANESE +static const unsigned short g_jis0208[84][94] = +{ + #include "jis0208.h" +}; + +static const unsigned short g_rev_jis[] = +{ + #include "revjis.h" +}; +#endif + +#ifdef CONFIG_LIBC_LOCALE_CHINESE +static const unsigned short g_gb18030[126][190] = +{ + #include "gb18030.h" +}; + +static const unsigned short g_big5[89][157] = +{ + #include "big5.h" +}; + +static const unsigned short g_hkscs[] = +{ + #include "hkscs.h" +}; +#endif + +#ifdef CONFIG_LIBC_LOCALE_KOREAN +static const unsigned short g_ksc[93][94] = +{ + #include "ksc.h" +}; +#endif + +/**************************************************************************** + * Private Functions + ****************************************************************************/ + +static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b) +{ + for (; *a && *b; a++, b++) + { + while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U) + a++; + + if ((*a | 32U) != *b) + { + return 1; + } + } + + return *a != *b; +} + +static size_t find_charmap(FAR const void *name) +{ + FAR const unsigned char *s; + + if (*(FAR char *)name == 0) + { + /* "utf8" */ + + name = g_charmaps; + } + + for (s = g_charmaps; *s; ) + { + if (!fuzzycmp(name, s)) + { + for (; *s; s += strlen((FAR void *)s) + 1); + return s + 1 - g_charmaps; + } + + s += strlen((FAR void *)s)+1; Review Comment: ```suggestion s += strlen((FAR void *)s) + 1; ``` why cast to `void *`? ########## libs/libc/locale/lib_iconv.c: ########## @@ -0,0 +1,1423 @@ +/**************************************************************************** + * libs/libc/locale/lib_iconv.c + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ****************************************************************************/ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include <iconv.h> +#include <errno.h> +#include <wchar.h> +#include <string.h> +#include <stdlib.h> +#include <limits.h> +#include <stdint.h> +#include <locale.h> + +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ + +#define UTF_32BE 0300 +#define UTF_16LE 0301 +#define UTF_16BE 0302 +#define UTF_32LE 0303 +#define UCS2BE 0304 +#define UCS2LE 0305 +#define WCHAR_T 0306 +#define US_ASCII 0307 +#define UTF_8 0310 +#define UTF_16 0312 +#define UTF_32 0313 +#define UCS2 0314 +#define EUC_JP 0320 +#define SHIFT_JIS 0321 +#define ISO2022_JP 0322 +#define GB18030 0330 +#define GBK 0331 +#define GB2312 0332 +#define BIG5 0340 +#define EUC_KR 0350 + +/**************************************************************************** + * Private Types + ****************************************************************************/ + +struct stateful_cd +{ + iconv_t base_cd; + unsigned state; +}; + +/**************************************************************************** + * Private Data + ****************************************************************************/ + +/* Definitions of g_charmaps. Each charmap consists of: + * 1. Empty-string-terminated list of null-terminated aliases. + * 2. Special type code or number of elided quads of entries. + * 3. Character table (size determined by field 2), consisting + * of 5 bytes for every 4 characters, interpreted as 10-bit + * indices into the g_legacy_chars table. + */ + +static const unsigned char g_charmaps[] = +{ + "utf8\0char\0\0\310" + "wchart\0\0\306" + "ucs2be\0\0\304" + "ucs2le\0\0\305" + "utf16be\0\0\302" + "utf16le\0\0\301" + "ucs4be\0utf32be\0\0\300" + "ucs4le\0utf32le\0\0\303" + "ascii\0usascii\0iso646\0iso646us\0\0\307" + "utf16\0\0\312" + "ucs4\0utf32\0\0\313" + "ucs2\0\0\314" +#ifdef CONFIG_LIBC_LOCALE_JAPANESE + "eucjp\0\0\320" + "shiftjis\0sjis\0\0\321" + "iso2022jp\0\0\322" +#endif +#ifdef CONFIG_LIBC_LOCALE_CHINESE + "gb18030\0\0\330" + "gbk\0\0\331" + "gb2312\0\0\332" + "big5\0bigfive\0cp950\0big5hkscs\0\0\340" +#endif +#ifdef CONFIG_LIBC_LOCALE_KOREAN + "euckr\0ksc5601\0ksx1001\0cp949\0\0\350" +#endif +#ifdef CONFIG_LIBC_LOCALE_CODEPAGES + #include "codepages.h" +#endif +}; + +/* Table of characters that appear in legacy 8-bit codepages, + * limited to 1024 slots (10 bit indices). The first 256 entries + * are elided since those characters are obviously all included. + */ + +static const unsigned short g_legacy_chars[] = +{ + #include "legacychars.h" +}; + +#ifdef CONFIG_LIBC_LOCALE_JAPANESE +static const unsigned short g_jis0208[84][94] = +{ + #include "jis0208.h" +}; + +static const unsigned short g_rev_jis[] = +{ + #include "revjis.h" +}; +#endif + +#ifdef CONFIG_LIBC_LOCALE_CHINESE +static const unsigned short g_gb18030[126][190] = +{ + #include "gb18030.h" +}; + +static const unsigned short g_big5[89][157] = +{ + #include "big5.h" +}; + +static const unsigned short g_hkscs[] = +{ + #include "hkscs.h" +}; +#endif + +#ifdef CONFIG_LIBC_LOCALE_KOREAN +static const unsigned short g_ksc[93][94] = +{ + #include "ksc.h" +}; +#endif + +/**************************************************************************** + * Private Functions + ****************************************************************************/ + +static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b) +{ + for (; *a && *b; a++, b++) + { + while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U) + a++; + + if ((*a | 32U) != *b) + { + return 1; + } + } + + return *a != *b; +} + +static size_t find_charmap(FAR const void *name) +{ + FAR const unsigned char *s; + + if (*(FAR char *)name == 0) + { + /* "utf8" */ + + name = g_charmaps; + } + + for (s = g_charmaps; *s; ) + { + if (!fuzzycmp(name, s)) + { + for (; *s; s += strlen((FAR void *)s) + 1); + return s + 1 - g_charmaps; + } + + s += strlen((FAR void *)s)+1; + if (*s == 0) + { + if (s[1] > 0200) + { + s += 2; + } + else + { + s += 2 + (64U - s[1]) * 5; + } + } + } + + return -1; +} + +static iconv_t combine_to_from(size_t t, size_t f) +{ + return (FAR void *)(f << 16 | t << 1 | 1); +} + +static size_t extract_from(iconv_t cd) +{ + return (size_t)cd >> 16; +} + +static size_t extract_to(iconv_t cd) +{ + return (size_t)cd >> 1 & 0x7fff; +} + +static unsigned get_16(FAR const unsigned char *s, int e) +{ + e &= 1; + return s[e] << 8 | s[1 - e]; +} + +static void put_16(FAR unsigned char *s, unsigned c, int e) +{ + e &= 1; + s[e] = c >> 8; + s[1 - e] = c; +} + +static unsigned get_32(FAR const unsigned char *s, int e) +{ + e &= 3; + return (s[e] + 0U) << 24 | s[e ^ 1] << 16 | s[e ^ 2] << 8 | s[e ^ 3]; +} + +static void put_32(FAR unsigned char *s, unsigned c, int e) +{ + e &= 3; + s[e ^ 0] = c >> 24; + s[e ^ 1] = c >> 16; + s[e ^ 2] = c >> 8; + s[e ^ 3] = c; +} + +static unsigned legacy_map(const unsigned char *map, unsigned c) +{ + unsigned x; + + if (c < 4 * map[0 - 1]) + { + return c; + } + + x = c - 4 * map[0 - 1]; + x = (map[x * 5 / 4] >> (2 * x % 8)) | + ((map[x * 5 / 4 + 1] << (8 - 2 * x % 8)) & 1023); + return x < 256 ? x : g_legacy_chars[x - 256]; +} + +#ifdef CONFIG_LIBC_LOCALE_JAPANESE +static unsigned uni_to_jis(unsigned c) +{ + unsigned nel = sizeof(g_rev_jis) / sizeof(*g_rev_jis); Review Comment: we can use `nitems` here ########## libs/libc/locale/lib_iconv.c: ########## @@ -0,0 +1,1423 @@ +/**************************************************************************** + * libs/libc/locale/lib_iconv.c + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ****************************************************************************/ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include <iconv.h> +#include <errno.h> +#include <wchar.h> +#include <string.h> +#include <stdlib.h> +#include <limits.h> +#include <stdint.h> +#include <locale.h> + +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ + +#define UTF_32BE 0300 +#define UTF_16LE 0301 +#define UTF_16BE 0302 +#define UTF_32LE 0303 +#define UCS2BE 0304 +#define UCS2LE 0305 +#define WCHAR_T 0306 +#define US_ASCII 0307 +#define UTF_8 0310 +#define UTF_16 0312 +#define UTF_32 0313 +#define UCS2 0314 +#define EUC_JP 0320 +#define SHIFT_JIS 0321 +#define ISO2022_JP 0322 +#define GB18030 0330 +#define GBK 0331 +#define GB2312 0332 +#define BIG5 0340 +#define EUC_KR 0350 + +/**************************************************************************** + * Private Types + ****************************************************************************/ + +struct stateful_cd +{ + iconv_t base_cd; + unsigned state; +}; + +/**************************************************************************** + * Private Data + ****************************************************************************/ + +/* Definitions of g_charmaps. Each charmap consists of: + * 1. Empty-string-terminated list of null-terminated aliases. + * 2. Special type code or number of elided quads of entries. + * 3. Character table (size determined by field 2), consisting + * of 5 bytes for every 4 characters, interpreted as 10-bit + * indices into the g_legacy_chars table. + */ + +static const unsigned char g_charmaps[] = +{ + "utf8\0char\0\0\310" + "wchart\0\0\306" + "ucs2be\0\0\304" + "ucs2le\0\0\305" + "utf16be\0\0\302" + "utf16le\0\0\301" + "ucs4be\0utf32be\0\0\300" + "ucs4le\0utf32le\0\0\303" + "ascii\0usascii\0iso646\0iso646us\0\0\307" + "utf16\0\0\312" + "ucs4\0utf32\0\0\313" + "ucs2\0\0\314" +#ifdef CONFIG_LIBC_LOCALE_JAPANESE + "eucjp\0\0\320" + "shiftjis\0sjis\0\0\321" + "iso2022jp\0\0\322" +#endif +#ifdef CONFIG_LIBC_LOCALE_CHINESE + "gb18030\0\0\330" + "gbk\0\0\331" + "gb2312\0\0\332" + "big5\0bigfive\0cp950\0big5hkscs\0\0\340" +#endif +#ifdef CONFIG_LIBC_LOCALE_KOREAN + "euckr\0ksc5601\0ksx1001\0cp949\0\0\350" +#endif +#ifdef CONFIG_LIBC_LOCALE_CODEPAGES + #include "codepages.h" +#endif +}; + +/* Table of characters that appear in legacy 8-bit codepages, + * limited to 1024 slots (10 bit indices). The first 256 entries + * are elided since those characters are obviously all included. + */ + +static const unsigned short g_legacy_chars[] = +{ + #include "legacychars.h" +}; + +#ifdef CONFIG_LIBC_LOCALE_JAPANESE +static const unsigned short g_jis0208[84][94] = +{ + #include "jis0208.h" +}; + +static const unsigned short g_rev_jis[] = +{ + #include "revjis.h" +}; +#endif + +#ifdef CONFIG_LIBC_LOCALE_CHINESE +static const unsigned short g_gb18030[126][190] = +{ + #include "gb18030.h" +}; + +static const unsigned short g_big5[89][157] = +{ + #include "big5.h" +}; + +static const unsigned short g_hkscs[] = +{ + #include "hkscs.h" +}; +#endif + +#ifdef CONFIG_LIBC_LOCALE_KOREAN +static const unsigned short g_ksc[93][94] = +{ + #include "ksc.h" +}; +#endif + +/**************************************************************************** + * Private Functions + ****************************************************************************/ + +static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b) +{ + for (; *a && *b; a++, b++) + { + while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U) + a++; + + if ((*a | 32U) != *b) + { + return 1; + } + } + + return *a != *b; +} + +static size_t find_charmap(FAR const void *name) +{ + FAR const unsigned char *s; + + if (*(FAR char *)name == 0) + { + /* "utf8" */ + + name = g_charmaps; + } + + for (s = g_charmaps; *s; ) Review Comment: ```suggestion for (s = g_charmaps; *s != '\0'; ) ``` ########## libs/libc/locale/lib_iconv.c: ########## @@ -0,0 +1,1423 @@ +/**************************************************************************** + * libs/libc/locale/lib_iconv.c + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ****************************************************************************/ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include <iconv.h> +#include <errno.h> +#include <wchar.h> +#include <string.h> +#include <stdlib.h> +#include <limits.h> +#include <stdint.h> +#include <locale.h> + +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ + +#define UTF_32BE 0300 +#define UTF_16LE 0301 +#define UTF_16BE 0302 +#define UTF_32LE 0303 +#define UCS2BE 0304 +#define UCS2LE 0305 +#define WCHAR_T 0306 +#define US_ASCII 0307 +#define UTF_8 0310 +#define UTF_16 0312 +#define UTF_32 0313 +#define UCS2 0314 +#define EUC_JP 0320 +#define SHIFT_JIS 0321 +#define ISO2022_JP 0322 +#define GB18030 0330 +#define GBK 0331 +#define GB2312 0332 +#define BIG5 0340 +#define EUC_KR 0350 + +/**************************************************************************** + * Private Types + ****************************************************************************/ + +struct stateful_cd +{ + iconv_t base_cd; + unsigned state; +}; + +/**************************************************************************** + * Private Data + ****************************************************************************/ + +/* Definitions of g_charmaps. Each charmap consists of: + * 1. Empty-string-terminated list of null-terminated aliases. + * 2. Special type code or number of elided quads of entries. + * 3. Character table (size determined by field 2), consisting + * of 5 bytes for every 4 characters, interpreted as 10-bit + * indices into the g_legacy_chars table. + */ + +static const unsigned char g_charmaps[] = +{ + "utf8\0char\0\0\310" + "wchart\0\0\306" + "ucs2be\0\0\304" + "ucs2le\0\0\305" + "utf16be\0\0\302" + "utf16le\0\0\301" + "ucs4be\0utf32be\0\0\300" + "ucs4le\0utf32le\0\0\303" + "ascii\0usascii\0iso646\0iso646us\0\0\307" + "utf16\0\0\312" + "ucs4\0utf32\0\0\313" + "ucs2\0\0\314" +#ifdef CONFIG_LIBC_LOCALE_JAPANESE + "eucjp\0\0\320" + "shiftjis\0sjis\0\0\321" + "iso2022jp\0\0\322" +#endif +#ifdef CONFIG_LIBC_LOCALE_CHINESE + "gb18030\0\0\330" + "gbk\0\0\331" + "gb2312\0\0\332" + "big5\0bigfive\0cp950\0big5hkscs\0\0\340" +#endif +#ifdef CONFIG_LIBC_LOCALE_KOREAN + "euckr\0ksc5601\0ksx1001\0cp949\0\0\350" +#endif +#ifdef CONFIG_LIBC_LOCALE_CODEPAGES + #include "codepages.h" +#endif +}; + +/* Table of characters that appear in legacy 8-bit codepages, + * limited to 1024 slots (10 bit indices). The first 256 entries + * are elided since those characters are obviously all included. + */ + +static const unsigned short g_legacy_chars[] = +{ + #include "legacychars.h" Review Comment: ```suggestion #include "legacychars.h" ``` ########## libs/libc/locale/lib_iconv.c: ########## @@ -0,0 +1,1423 @@ +/**************************************************************************** + * libs/libc/locale/lib_iconv.c + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ****************************************************************************/ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include <iconv.h> +#include <errno.h> +#include <wchar.h> +#include <string.h> +#include <stdlib.h> +#include <limits.h> +#include <stdint.h> +#include <locale.h> + +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ + +#define UTF_32BE 0300 +#define UTF_16LE 0301 +#define UTF_16BE 0302 +#define UTF_32LE 0303 +#define UCS2BE 0304 +#define UCS2LE 0305 +#define WCHAR_T 0306 +#define US_ASCII 0307 +#define UTF_8 0310 +#define UTF_16 0312 +#define UTF_32 0313 +#define UCS2 0314 +#define EUC_JP 0320 +#define SHIFT_JIS 0321 +#define ISO2022_JP 0322 +#define GB18030 0330 +#define GBK 0331 +#define GB2312 0332 +#define BIG5 0340 +#define EUC_KR 0350 + +/**************************************************************************** + * Private Types + ****************************************************************************/ + +struct stateful_cd +{ + iconv_t base_cd; + unsigned state; +}; + +/**************************************************************************** + * Private Data + ****************************************************************************/ + +/* Definitions of g_charmaps. Each charmap consists of: + * 1. Empty-string-terminated list of null-terminated aliases. + * 2. Special type code or number of elided quads of entries. + * 3. Character table (size determined by field 2), consisting + * of 5 bytes for every 4 characters, interpreted as 10-bit + * indices into the g_legacy_chars table. + */ + +static const unsigned char g_charmaps[] = +{ + "utf8\0char\0\0\310" + "wchart\0\0\306" + "ucs2be\0\0\304" + "ucs2le\0\0\305" + "utf16be\0\0\302" + "utf16le\0\0\301" + "ucs4be\0utf32be\0\0\300" + "ucs4le\0utf32le\0\0\303" + "ascii\0usascii\0iso646\0iso646us\0\0\307" + "utf16\0\0\312" + "ucs4\0utf32\0\0\313" + "ucs2\0\0\314" +#ifdef CONFIG_LIBC_LOCALE_JAPANESE + "eucjp\0\0\320" + "shiftjis\0sjis\0\0\321" + "iso2022jp\0\0\322" +#endif +#ifdef CONFIG_LIBC_LOCALE_CHINESE + "gb18030\0\0\330" + "gbk\0\0\331" + "gb2312\0\0\332" + "big5\0bigfive\0cp950\0big5hkscs\0\0\340" +#endif +#ifdef CONFIG_LIBC_LOCALE_KOREAN + "euckr\0ksc5601\0ksx1001\0cp949\0\0\350" +#endif +#ifdef CONFIG_LIBC_LOCALE_CODEPAGES + #include "codepages.h" +#endif +}; + +/* Table of characters that appear in legacy 8-bit codepages, + * limited to 1024 slots (10 bit indices). The first 256 entries + * are elided since those characters are obviously all included. + */ + +static const unsigned short g_legacy_chars[] = +{ + #include "legacychars.h" +}; + +#ifdef CONFIG_LIBC_LOCALE_JAPANESE +static const unsigned short g_jis0208[84][94] = +{ + #include "jis0208.h" +}; + +static const unsigned short g_rev_jis[] = +{ + #include "revjis.h" +}; +#endif + +#ifdef CONFIG_LIBC_LOCALE_CHINESE +static const unsigned short g_gb18030[126][190] = +{ + #include "gb18030.h" +}; + +static const unsigned short g_big5[89][157] = +{ + #include "big5.h" +}; + +static const unsigned short g_hkscs[] = +{ + #include "hkscs.h" +}; +#endif + +#ifdef CONFIG_LIBC_LOCALE_KOREAN +static const unsigned short g_ksc[93][94] = +{ + #include "ksc.h" +}; +#endif + +/**************************************************************************** + * Private Functions + ****************************************************************************/ + +static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b) +{ + for (; *a && *b; a++, b++) + { + while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U) + a++; + + if ((*a | 32U) != *b) + { + return 1; + } + } + + return *a != *b; +} + +static size_t find_charmap(FAR const void *name) +{ + FAR const unsigned char *s; + + if (*(FAR char *)name == 0) + { + /* "utf8" */ + + name = g_charmaps; + } + + for (s = g_charmaps; *s; ) + { + if (!fuzzycmp(name, s)) + { + for (; *s; s += strlen((FAR void *)s) + 1); Review Comment: why cast to `void *`? ########## libs/libc/locale/lib_iconv.c: ########## @@ -0,0 +1,1423 @@ +/**************************************************************************** + * libs/libc/locale/lib_iconv.c + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ****************************************************************************/ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include <iconv.h> +#include <errno.h> +#include <wchar.h> +#include <string.h> +#include <stdlib.h> +#include <limits.h> +#include <stdint.h> +#include <locale.h> + +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ + +#define UTF_32BE 0300 +#define UTF_16LE 0301 +#define UTF_16BE 0302 +#define UTF_32LE 0303 +#define UCS2BE 0304 +#define UCS2LE 0305 +#define WCHAR_T 0306 +#define US_ASCII 0307 +#define UTF_8 0310 +#define UTF_16 0312 +#define UTF_32 0313 +#define UCS2 0314 +#define EUC_JP 0320 +#define SHIFT_JIS 0321 +#define ISO2022_JP 0322 +#define GB18030 0330 +#define GBK 0331 +#define GB2312 0332 +#define BIG5 0340 +#define EUC_KR 0350 + +/**************************************************************************** + * Private Types + ****************************************************************************/ + +struct stateful_cd +{ + iconv_t base_cd; + unsigned state; +}; + +/**************************************************************************** + * Private Data + ****************************************************************************/ + +/* Definitions of g_charmaps. Each charmap consists of: + * 1. Empty-string-terminated list of null-terminated aliases. + * 2. Special type code or number of elided quads of entries. + * 3. Character table (size determined by field 2), consisting + * of 5 bytes for every 4 characters, interpreted as 10-bit + * indices into the g_legacy_chars table. + */ + +static const unsigned char g_charmaps[] = +{ + "utf8\0char\0\0\310" + "wchart\0\0\306" + "ucs2be\0\0\304" + "ucs2le\0\0\305" + "utf16be\0\0\302" + "utf16le\0\0\301" + "ucs4be\0utf32be\0\0\300" + "ucs4le\0utf32le\0\0\303" + "ascii\0usascii\0iso646\0iso646us\0\0\307" + "utf16\0\0\312" + "ucs4\0utf32\0\0\313" + "ucs2\0\0\314" +#ifdef CONFIG_LIBC_LOCALE_JAPANESE + "eucjp\0\0\320" + "shiftjis\0sjis\0\0\321" + "iso2022jp\0\0\322" +#endif +#ifdef CONFIG_LIBC_LOCALE_CHINESE + "gb18030\0\0\330" + "gbk\0\0\331" + "gb2312\0\0\332" + "big5\0bigfive\0cp950\0big5hkscs\0\0\340" +#endif +#ifdef CONFIG_LIBC_LOCALE_KOREAN + "euckr\0ksc5601\0ksx1001\0cp949\0\0\350" +#endif +#ifdef CONFIG_LIBC_LOCALE_CODEPAGES + #include "codepages.h" +#endif +}; + +/* Table of characters that appear in legacy 8-bit codepages, + * limited to 1024 slots (10 bit indices). The first 256 entries + * are elided since those characters are obviously all included. + */ + +static const unsigned short g_legacy_chars[] = +{ + #include "legacychars.h" +}; + +#ifdef CONFIG_LIBC_LOCALE_JAPANESE +static const unsigned short g_jis0208[84][94] = +{ + #include "jis0208.h" +}; + +static const unsigned short g_rev_jis[] = +{ + #include "revjis.h" +}; +#endif + +#ifdef CONFIG_LIBC_LOCALE_CHINESE +static const unsigned short g_gb18030[126][190] = +{ + #include "gb18030.h" +}; + +static const unsigned short g_big5[89][157] = +{ + #include "big5.h" +}; + +static const unsigned short g_hkscs[] = +{ + #include "hkscs.h" +}; +#endif + +#ifdef CONFIG_LIBC_LOCALE_KOREAN +static const unsigned short g_ksc[93][94] = +{ + #include "ksc.h" +}; +#endif + +/**************************************************************************** + * Private Functions + ****************************************************************************/ + +static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b) +{ + for (; *a && *b; a++, b++) + { + while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U) + a++; + + if ((*a | 32U) != *b) + { + return 1; + } + } + + return *a != *b; +} + +static size_t find_charmap(FAR const void *name) +{ + FAR const unsigned char *s; + + if (*(FAR char *)name == 0) + { + /* "utf8" */ + + name = g_charmaps; + } + + for (s = g_charmaps; *s; ) + { + if (!fuzzycmp(name, s)) + { + for (; *s; s += strlen((FAR void *)s) + 1); + return s + 1 - g_charmaps; + } + + s += strlen((FAR void *)s)+1; + if (*s == 0) + { + if (s[1] > 0200) + { + s += 2; + } + else + { + s += 2 + (64U - s[1]) * 5; + } + } + } + + return -1; +} + +static iconv_t combine_to_from(size_t t, size_t f) +{ + return (FAR void *)(f << 16 | t << 1 | 1); Review Comment: why cast to `void *` and not to `iconv_t`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
