Unicode does not support such values because they are unrepresentable in UTF-16.
Signed-off-by: Ben Boeckel <ben.boec...@kitware.com> --- libcpp/ChangeLog | 6 ++++++ libcpp/charset.cc | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 18d5bcceaf0..4d707277531 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,9 @@ +2022-10-27 Ben Boeckel <ben.boec...@kitware.com> + + * include/charset.cc: Reject encodings of codepoints above 0x10FFFF. + UTF-16 does not support such codepoints and therefore all Unicode + rejects such values. + 2022-10-19 Lewis Hyatt <lhy...@gmail.com> * include/cpplib.h (struct cpp_string): Use new "string_length" GTY. diff --git a/libcpp/charset.cc b/libcpp/charset.cc index 12a398e7527..e9da6674b5f 100644 --- a/libcpp/charset.cc +++ b/libcpp/charset.cc @@ -216,7 +216,7 @@ one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp, if (c <= 0x3FFFFFF && nbytes > 5) return EILSEQ; /* Make sure the character is valid. */ - if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF)) return EILSEQ; + if (c > 0x10FFFF || (c >= 0xD800 && c <= 0xDFFF)) return EILSEQ; *cp = c; *inbufp = inbuf; @@ -320,7 +320,7 @@ one_utf32_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp, s += inbuf[bigend ? 2 : 1] << 8; s += inbuf[bigend ? 3 : 0]; - if (s >= 0x7FFFFFFF || (s >= 0xD800 && s <= 0xDFFF)) + if (s > 0x10FFFF || (s >= 0xD800 && s <= 0xDFFF)) return EILSEQ; rval = one_cppchar_to_utf8 (s, outbufp, outbytesleftp); -- 2.37.3