[PATCH] D31765: Skip Unicode character expansion in assembly files
This revision was automatically updated to reflect the committed changes. Closed by commit rL299754: Skip Unicode character expansion in assembly files (authored by sanwou01). Changed prior to commit: https://reviews.llvm.org/D31765?vs=94501=94505#toc Repository: rL LLVM https://reviews.llvm.org/D31765 Files: cfe/trunk/lib/Lex/Lexer.cpp cfe/trunk/test/Lexer/asm-preproc-no-unicode.s Index: cfe/trunk/lib/Lex/Lexer.cpp === --- cfe/trunk/lib/Lex/Lexer.cpp +++ cfe/trunk/lib/Lex/Lexer.cpp @@ -3603,17 +3603,19 @@ // UCNs (C99 6.4.3, C++11 [lex.charset]p2) case '\\': -if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) { - if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { -if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) - return true; // KeepWhitespaceMode +if (!LangOpts.AsmPreprocessor) { + if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) { +if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { + if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) +return true; // KeepWhitespaceMode + + // We only saw whitespace, so just try again with this lexer. + // (We manually eliminate the tail call to avoid recursion.) + goto LexNextToken; +} -// We only saw whitespace, so just try again with this lexer. -// (We manually eliminate the tail call to avoid recursion.) -goto LexNextToken; +return LexUnicode(Result, CodePoint, CurPtr); } - - return LexUnicode(Result, CodePoint, CurPtr); } Kind = tok::unknown; Index: cfe/trunk/test/Lexer/asm-preproc-no-unicode.s === --- cfe/trunk/test/Lexer/asm-preproc-no-unicode.s +++ cfe/trunk/test/Lexer/asm-preproc-no-unicode.s @@ -0,0 +1,8 @@ +// RUN: %clang -E -xassembler-with-cpp %s -o - 2>&1 | FileCheck %s + +// CHECK-NOT: warning: \u used with no following hex digits +// CHECK: .word \u + +.macro foo, u +.word \u +.endm Index: cfe/trunk/lib/Lex/Lexer.cpp === --- cfe/trunk/lib/Lex/Lexer.cpp +++ cfe/trunk/lib/Lex/Lexer.cpp @@ -3603,17 +3603,19 @@ // UCNs (C99 6.4.3, C++11 [lex.charset]p2) case '\\': -if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) { - if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { -if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) - return true; // KeepWhitespaceMode +if (!LangOpts.AsmPreprocessor) { + if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) { +if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { + if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) +return true; // KeepWhitespaceMode + + // We only saw whitespace, so just try again with this lexer. + // (We manually eliminate the tail call to avoid recursion.) + goto LexNextToken; +} -// We only saw whitespace, so just try again with this lexer. -// (We manually eliminate the tail call to avoid recursion.) -goto LexNextToken; +return LexUnicode(Result, CodePoint, CurPtr); } - - return LexUnicode(Result, CodePoint, CurPtr); } Kind = tok::unknown; Index: cfe/trunk/test/Lexer/asm-preproc-no-unicode.s === --- cfe/trunk/test/Lexer/asm-preproc-no-unicode.s +++ cfe/trunk/test/Lexer/asm-preproc-no-unicode.s @@ -0,0 +1,8 @@ +// RUN: %clang -E -xassembler-with-cpp %s -o - 2>&1 | FileCheck %s + +// CHECK-NOT: warning: \u used with no following hex digits +// CHECK: .word \u + +.macro foo, u +.word \u +.endm ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31765: Skip Unicode character expansion in assembly files
olista01 accepted this revision. olista01 added a comment. This revision is now accepted and ready to land. LGTM https://reviews.llvm.org/D31765 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31765: Skip Unicode character expansion in assembly files
salari01 updated this revision to Diff 94501. salari01 added a comment. Updated test to check preprocessed output instead of the assembled file. Cannot use `-verify` with the driver, but with `-E` and `-o -`, there is no longer a need to have the dummy warning to avoid the FileCheck error. https://reviews.llvm.org/D31765 Files: lib/Lex/Lexer.cpp test/Lexer/asm-preproc-no-unicode.s Index: test/Lexer/asm-preproc-no-unicode.s === --- /dev/null +++ test/Lexer/asm-preproc-no-unicode.s @@ -0,0 +1,8 @@ +// RUN: %clang -E -xassembler-with-cpp %s -o - 2>&1 | FileCheck %s + +// CHECK-NOT: warning: \u used with no following hex digits +// CHECK: .word \u + +.macro foo, u +.word \u +.endm Index: lib/Lex/Lexer.cpp === --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -3603,17 +3603,19 @@ // UCNs (C99 6.4.3, C++11 [lex.charset]p2) case '\\': -if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) { - if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { -if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) - return true; // KeepWhitespaceMode +if (!LangOpts.AsmPreprocessor) { + if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) { +if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { + if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) +return true; // KeepWhitespaceMode + + // We only saw whitespace, so just try again with this lexer. + // (We manually eliminate the tail call to avoid recursion.) + goto LexNextToken; +} -// We only saw whitespace, so just try again with this lexer. -// (We manually eliminate the tail call to avoid recursion.) -goto LexNextToken; +return LexUnicode(Result, CodePoint, CurPtr); } - - return LexUnicode(Result, CodePoint, CurPtr); } Kind = tok::unknown; Index: test/Lexer/asm-preproc-no-unicode.s === --- /dev/null +++ test/Lexer/asm-preproc-no-unicode.s @@ -0,0 +1,8 @@ +// RUN: %clang -E -xassembler-with-cpp %s -o - 2>&1 | FileCheck %s + +// CHECK-NOT: warning: \u used with no following hex digits +// CHECK: .word \u + +.macro foo, u +.word \u +.endm Index: lib/Lex/Lexer.cpp === --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -3603,17 +3603,19 @@ // UCNs (C99 6.4.3, C++11 [lex.charset]p2) case '\\': -if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) { - if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { -if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) - return true; // KeepWhitespaceMode +if (!LangOpts.AsmPreprocessor) { + if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) { +if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { + if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) +return true; // KeepWhitespaceMode + + // We only saw whitespace, so just try again with this lexer. + // (We manually eliminate the tail call to avoid recursion.) + goto LexNextToken; +} -// We only saw whitespace, so just try again with this lexer. -// (We manually eliminate the tail call to avoid recursion.) -goto LexNextToken; +return LexUnicode(Result, CodePoint, CurPtr); } - - return LexUnicode(Result, CodePoint, CurPtr); } Kind = tok::unknown; ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31765: Skip Unicode character expansion in assembly files
salari01 created this revision. When using the C preprocessor with assembly files, either with a capital `S` file extension, or with `-xassembler-with-cpp`, the Unicode escape sequence `\u` is ignored. The `\u` pattern can be used for expanding a macro argument that starts with `u`. https://reviews.llvm.org/D31765 Files: lib/Lex/Lexer.cpp test/Lexer/asm-preproc-no-unicode.s Index: test/Lexer/asm-preproc-no-unicode.s === --- /dev/null +++ test/Lexer/asm-preproc-no-unicode.s @@ -0,0 +1,13 @@ +// RUN: %clang --target=arm-arm-none-eabi -c -xassembler-with-cpp %s -o %t 2>&1 | FileCheck %s --check-prefix=WARNING +// RUN: llvm-objdump -s %t | FileCheck %s --check-prefix=DATA + +// WARNING-NOT: warning: \u used with no following hex digits +// DATA: Contents of section data: +// DATA-NEXT: efbeadde + +.warning // required to avoid FileCheck empty input error +.macro foo, u, name +.section \name, "a", %progbits +.word \u +.endm +foo 0xdeadbeef, data Index: lib/Lex/Lexer.cpp === --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -3603,17 +3603,19 @@ // UCNs (C99 6.4.3, C++11 [lex.charset]p2) case '\\': -if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) { - if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { -if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) - return true; // KeepWhitespaceMode +if (!LangOpts.AsmPreprocessor) { + if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) { +if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { + if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) +return true; // KeepWhitespaceMode + + // We only saw whitespace, so just try again with this lexer. + // (We manually eliminate the tail call to avoid recursion.) + goto LexNextToken; +} -// We only saw whitespace, so just try again with this lexer. -// (We manually eliminate the tail call to avoid recursion.) -goto LexNextToken; +return LexUnicode(Result, CodePoint, CurPtr); } - - return LexUnicode(Result, CodePoint, CurPtr); } Kind = tok::unknown; Index: test/Lexer/asm-preproc-no-unicode.s === --- /dev/null +++ test/Lexer/asm-preproc-no-unicode.s @@ -0,0 +1,13 @@ +// RUN: %clang --target=arm-arm-none-eabi -c -xassembler-with-cpp %s -o %t 2>&1 | FileCheck %s --check-prefix=WARNING +// RUN: llvm-objdump -s %t | FileCheck %s --check-prefix=DATA + +// WARNING-NOT: warning: \u used with no following hex digits +// DATA: Contents of section data: +// DATA-NEXT: efbeadde + +.warning // required to avoid FileCheck empty input error +.macro foo, u, name +.section \name, "a", %progbits +.word \u +.endm +foo 0xdeadbeef, data Index: lib/Lex/Lexer.cpp === --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -3603,17 +3603,19 @@ // UCNs (C99 6.4.3, C++11 [lex.charset]p2) case '\\': -if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) { - if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { -if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) - return true; // KeepWhitespaceMode +if (!LangOpts.AsmPreprocessor) { + if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) { +if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { + if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) +return true; // KeepWhitespaceMode + + // We only saw whitespace, so just try again with this lexer. + // (We manually eliminate the tail call to avoid recursion.) + goto LexNextToken; +} -// We only saw whitespace, so just try again with this lexer. -// (We manually eliminate the tail call to avoid recursion.) -goto LexNextToken; +return LexUnicode(Result, CodePoint, CurPtr); } - - return LexUnicode(Result, CodePoint, CurPtr); } Kind = tok::unknown; ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits