modocache created this revision. modocache added reviewers: chandlerc, rsmith. Herald added a subscriber: jdoerfert. Herald added a project: clang.
On Twitter @LunarLambda pointed out that Clang allows Hangul whitespace Unicode characters in identifiers, which allows users to write very confusing programs: https://twitter.com/LunarLambda/status/1110097030423240705 Clang warns about similar whitespace Unicode characters. Add the Hangul half-width and full-width whitespace characters to the set that Clang warns about. N.B.: Clang warns about Japanese space character `<U+3000>`, but in a different way, because that character is not a valid identifier character according to the C++11 standard. So Clang emits a warning that it will treat the Japanese `<U+3000>` as whitespace. This is different from the Korean Hangul whitespace character, which is a valid identifier character according to the C++11 standard. For this reason, Clang warns the character will be treated as an identifier character, not as a whitecpace character -- so in sum, Clang's behavior is slightly different for the Japanese whitespace character compared to the Korean Hangul one. Repository: rC Clang https://reviews.llvm.org/D59765 Files: lib/Lex/Lexer.cpp test/Lexer/unicode.c Index: test/Lexer/unicode.c =================================================================== --- test/Lexer/unicode.c +++ test/Lexer/unicode.c @@ -39,10 +39,12 @@ // expected-warning@-1 {{treating Unicode character <U+037E> as identifier character rather than as ';' symbol}} int v=[=](auto){return~x;}(); // expected-warning 12{{treating Unicode character}} -int xx; +int xxxㅤᅠ; // expected-warning@-1 {{identifier contains Unicode character <U+2060> that is invisible in some environments}} // expected-warning@-2 {{identifier contains Unicode character <U+FEFF> that is invisible in some environments}} // expected-warning@-3 {{identifier contains Unicode character <U+200D> that is invisible in some environments}} +// expected-warning@-4 {{identifier contains Unicode character <U+3164> that is invisible in some environments}} +// expected-warning@-5 {{identifier contains Unicode character <U+FFA0> that is invisible in some environments}} int foobar = 0; // expected-warning {{identifier contains Unicode character <U+200B> that is invisible in some environments}} int x = foobar; // expected-error {{undeclared identifier}} Index: lib/Lex/Lexer.cpp =================================================================== --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -1528,6 +1528,7 @@ {U'\u2227', '^'}, // LOGICAL AND {U'\u2236', ':'}, // RATIO {U'\u223c', '~'}, // TILDE OPERATOR + {U'\u3164', 0}, // HANGUL FILLER {U'\ua789', ':'}, // MODIFIER LETTER COLON {U'\ufeff', 0}, // ZERO WIDTH NO-BREAK SPACE {U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK @@ -1558,6 +1559,7 @@ {U'\uff5c', '|'}, // FULLWIDTH VERTICAL LINE {U'\uff5d', '}'}, // FULLWIDTH RIGHT CURLY BRACKET {U'\uff5e', '~'}, // FULLWIDTH TILDE + {U'\uffa0', 0}, // HALFWIDTH HANGUL FILLER {0, 0} }; auto Homoglyph =
Index: test/Lexer/unicode.c =================================================================== --- test/Lexer/unicode.c +++ test/Lexer/unicode.c @@ -39,10 +39,12 @@ // expected-warning@-1 {{treating Unicode character <U+037E> as identifier character rather than as ';' symbol}} int v=[=](auto){return~x;}(); // expected-warning 12{{treating Unicode character}} -int xx; +int xxxㅤᅠ; // expected-warning@-1 {{identifier contains Unicode character <U+2060> that is invisible in some environments}} // expected-warning@-2 {{identifier contains Unicode character <U+FEFF> that is invisible in some environments}} // expected-warning@-3 {{identifier contains Unicode character <U+200D> that is invisible in some environments}} +// expected-warning@-4 {{identifier contains Unicode character <U+3164> that is invisible in some environments}} +// expected-warning@-5 {{identifier contains Unicode character <U+FFA0> that is invisible in some environments}} int foobar = 0; // expected-warning {{identifier contains Unicode character <U+200B> that is invisible in some environments}} int x = foobar; // expected-error {{undeclared identifier}} Index: lib/Lex/Lexer.cpp =================================================================== --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -1528,6 +1528,7 @@ {U'\u2227', '^'}, // LOGICAL AND {U'\u2236', ':'}, // RATIO {U'\u223c', '~'}, // TILDE OPERATOR + {U'\u3164', 0}, // HANGUL FILLER {U'\ua789', ':'}, // MODIFIER LETTER COLON {U'\ufeff', 0}, // ZERO WIDTH NO-BREAK SPACE {U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK @@ -1558,6 +1559,7 @@ {U'\uff5c', '|'}, // FULLWIDTH VERTICAL LINE {U'\uff5d', '}'}, // FULLWIDTH RIGHT CURLY BRACKET {U'\uff5e', '~'}, // FULLWIDTH TILDE + {U'\uffa0', 0}, // HALFWIDTH HANGUL FILLER {0, 0} }; auto Homoglyph =
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits