salari01 created this revision.

When using the C preprocessor with assembly files, either with a capital `S` 
file extension, or with `-xassembler-with-cpp`, the Unicode escape sequence 
`\u` is ignored. The `\u` pattern can be used for expanding a macro argument 
that starts with `u`.


https://reviews.llvm.org/D31765

Files:
  lib/Lex/Lexer.cpp
  test/Lexer/asm-preproc-no-unicode.s


Index: test/Lexer/asm-preproc-no-unicode.s
===================================================================
--- /dev/null
+++ test/Lexer/asm-preproc-no-unicode.s
@@ -0,0 +1,13 @@
+// RUN: %clang --target=arm-arm-none-eabi -c -xassembler-with-cpp %s -o %t 
2>&1 | FileCheck %s --check-prefix=WARNING
+// RUN: llvm-objdump -s %t | FileCheck %s --check-prefix=DATA
+
+// WARNING-NOT: warning: \u used with no following hex digits
+// DATA: Contents of section data:
+// DATA-NEXT: 0000 efbeadde
+
+    .warning  // required to avoid FileCheck empty input error
+    .macro foo, u, name
+        .section \name, "a", %progbits
+        .word \u
+    .endm
+    foo 0xdeadbeef, data
Index: lib/Lex/Lexer.cpp
===================================================================
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -3603,17 +3603,19 @@
 
   // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
   case '\\':
-    if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
-      if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
-        if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
-          return true; // KeepWhitespaceMode
+    if (!LangOpts.AsmPreprocessor) {
+      if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
+        if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+          if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+            return true; // KeepWhitespaceMode
+
+          // We only saw whitespace, so just try again with this lexer.
+          // (We manually eliminate the tail call to avoid recursion.)
+          goto LexNextToken;
+        }
 
-        // We only saw whitespace, so just try again with this lexer.
-        // (We manually eliminate the tail call to avoid recursion.)
-        goto LexNextToken;
+        return LexUnicode(Result, CodePoint, CurPtr);
       }
-
-      return LexUnicode(Result, CodePoint, CurPtr);
     }
 
     Kind = tok::unknown;


Index: test/Lexer/asm-preproc-no-unicode.s
===================================================================
--- /dev/null
+++ test/Lexer/asm-preproc-no-unicode.s
@@ -0,0 +1,13 @@
+// RUN: %clang --target=arm-arm-none-eabi -c -xassembler-with-cpp %s -o %t 2>&1 | FileCheck %s --check-prefix=WARNING
+// RUN: llvm-objdump -s %t | FileCheck %s --check-prefix=DATA
+
+// WARNING-NOT: warning: \u used with no following hex digits
+// DATA: Contents of section data:
+// DATA-NEXT: 0000 efbeadde
+
+    .warning  // required to avoid FileCheck empty input error
+    .macro foo, u, name
+        .section \name, "a", %progbits
+        .word \u
+    .endm
+    foo 0xdeadbeef, data
Index: lib/Lex/Lexer.cpp
===================================================================
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -3603,17 +3603,19 @@
 
   // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
   case '\\':
-    if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
-      if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
-        if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
-          return true; // KeepWhitespaceMode
+    if (!LangOpts.AsmPreprocessor) {
+      if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
+        if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+          if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+            return true; // KeepWhitespaceMode
+
+          // We only saw whitespace, so just try again with this lexer.
+          // (We manually eliminate the tail call to avoid recursion.)
+          goto LexNextToken;
+        }
 
-        // We only saw whitespace, so just try again with this lexer.
-        // (We manually eliminate the tail call to avoid recursion.)
-        goto LexNextToken;
+        return LexUnicode(Result, CodePoint, CurPtr);
       }
-
-      return LexUnicode(Result, CodePoint, CurPtr);
     }
 
     Kind = tok::unknown;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to