[PATCH] D31765: Skip Unicode character expansion in assembly files

2017-04-07 Thread Sanne Wouda via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL299754: Skip Unicode character expansion in assembly files 
(authored by sanwou01).

Changed prior to commit:
  https://reviews.llvm.org/D31765?vs=94501=94505#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D31765

Files:
  cfe/trunk/lib/Lex/Lexer.cpp
  cfe/trunk/test/Lexer/asm-preproc-no-unicode.s


Index: cfe/trunk/lib/Lex/Lexer.cpp
===
--- cfe/trunk/lib/Lex/Lexer.cpp
+++ cfe/trunk/lib/Lex/Lexer.cpp
@@ -3603,17 +3603,19 @@
 
   // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
   case '\\':
-if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) {
-  if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
-if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
-  return true; // KeepWhitespaceMode
+if (!LangOpts.AsmPreprocessor) {
+  if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) {
+if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+  if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+return true; // KeepWhitespaceMode
+
+  // We only saw whitespace, so just try again with this lexer.
+  // (We manually eliminate the tail call to avoid recursion.)
+  goto LexNextToken;
+}
 
-// We only saw whitespace, so just try again with this lexer.
-// (We manually eliminate the tail call to avoid recursion.)
-goto LexNextToken;
+return LexUnicode(Result, CodePoint, CurPtr);
   }
-
-  return LexUnicode(Result, CodePoint, CurPtr);
 }
 
 Kind = tok::unknown;
Index: cfe/trunk/test/Lexer/asm-preproc-no-unicode.s
===
--- cfe/trunk/test/Lexer/asm-preproc-no-unicode.s
+++ cfe/trunk/test/Lexer/asm-preproc-no-unicode.s
@@ -0,0 +1,8 @@
+// RUN: %clang -E -xassembler-with-cpp %s -o - 2>&1 | FileCheck %s
+
+// CHECK-NOT: warning: \u used with no following hex digits
+// CHECK: .word \u
+
+.macro foo, u
+.word \u
+.endm


Index: cfe/trunk/lib/Lex/Lexer.cpp
===
--- cfe/trunk/lib/Lex/Lexer.cpp
+++ cfe/trunk/lib/Lex/Lexer.cpp
@@ -3603,17 +3603,19 @@
 
   // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
   case '\\':
-if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) {
-  if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
-if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
-  return true; // KeepWhitespaceMode
+if (!LangOpts.AsmPreprocessor) {
+  if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) {
+if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+  if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+return true; // KeepWhitespaceMode
+
+  // We only saw whitespace, so just try again with this lexer.
+  // (We manually eliminate the tail call to avoid recursion.)
+  goto LexNextToken;
+}
 
-// We only saw whitespace, so just try again with this lexer.
-// (We manually eliminate the tail call to avoid recursion.)
-goto LexNextToken;
+return LexUnicode(Result, CodePoint, CurPtr);
   }
-
-  return LexUnicode(Result, CodePoint, CurPtr);
 }
 
 Kind = tok::unknown;
Index: cfe/trunk/test/Lexer/asm-preproc-no-unicode.s
===
--- cfe/trunk/test/Lexer/asm-preproc-no-unicode.s
+++ cfe/trunk/test/Lexer/asm-preproc-no-unicode.s
@@ -0,0 +1,8 @@
+// RUN: %clang -E -xassembler-with-cpp %s -o - 2>&1 | FileCheck %s
+
+// CHECK-NOT: warning: \u used with no following hex digits
+// CHECK: .word \u
+
+.macro foo, u
+.word \u
+.endm
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31765: Skip Unicode character expansion in assembly files

2017-04-07 Thread Oliver Stannard via Phabricator via cfe-commits
olista01 accepted this revision.
olista01 added a comment.
This revision is now accepted and ready to land.

LGTM


https://reviews.llvm.org/D31765



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31765: Skip Unicode character expansion in assembly files

2017-04-07 Thread Salman Arif via Phabricator via cfe-commits
salari01 updated this revision to Diff 94501.
salari01 added a comment.

Updated test to check preprocessed output instead of the assembled file. Cannot 
use `-verify` with the driver, but with `-E` and `-o -`, there is no longer a 
need to have the dummy warning to avoid the FileCheck error.


https://reviews.llvm.org/D31765

Files:
  lib/Lex/Lexer.cpp
  test/Lexer/asm-preproc-no-unicode.s


Index: test/Lexer/asm-preproc-no-unicode.s
===
--- /dev/null
+++ test/Lexer/asm-preproc-no-unicode.s
@@ -0,0 +1,8 @@
+// RUN: %clang -E -xassembler-with-cpp %s -o - 2>&1 | FileCheck %s
+
+// CHECK-NOT: warning: \u used with no following hex digits
+// CHECK: .word \u
+
+.macro foo, u
+.word \u
+.endm
Index: lib/Lex/Lexer.cpp
===
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -3603,17 +3603,19 @@
 
   // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
   case '\\':
-if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) {
-  if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
-if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
-  return true; // KeepWhitespaceMode
+if (!LangOpts.AsmPreprocessor) {
+  if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) {
+if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+  if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+return true; // KeepWhitespaceMode
+
+  // We only saw whitespace, so just try again with this lexer.
+  // (We manually eliminate the tail call to avoid recursion.)
+  goto LexNextToken;
+}
 
-// We only saw whitespace, so just try again with this lexer.
-// (We manually eliminate the tail call to avoid recursion.)
-goto LexNextToken;
+return LexUnicode(Result, CodePoint, CurPtr);
   }
-
-  return LexUnicode(Result, CodePoint, CurPtr);
 }
 
 Kind = tok::unknown;


Index: test/Lexer/asm-preproc-no-unicode.s
===
--- /dev/null
+++ test/Lexer/asm-preproc-no-unicode.s
@@ -0,0 +1,8 @@
+// RUN: %clang -E -xassembler-with-cpp %s -o - 2>&1 | FileCheck %s
+
+// CHECK-NOT: warning: \u used with no following hex digits
+// CHECK: .word \u
+
+.macro foo, u
+.word \u
+.endm
Index: lib/Lex/Lexer.cpp
===
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -3603,17 +3603,19 @@
 
   // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
   case '\\':
-if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) {
-  if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
-if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
-  return true; // KeepWhitespaceMode
+if (!LangOpts.AsmPreprocessor) {
+  if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) {
+if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+  if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+return true; // KeepWhitespaceMode
+
+  // We only saw whitespace, so just try again with this lexer.
+  // (We manually eliminate the tail call to avoid recursion.)
+  goto LexNextToken;
+}
 
-// We only saw whitespace, so just try again with this lexer.
-// (We manually eliminate the tail call to avoid recursion.)
-goto LexNextToken;
+return LexUnicode(Result, CodePoint, CurPtr);
   }
-
-  return LexUnicode(Result, CodePoint, CurPtr);
 }
 
 Kind = tok::unknown;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31765: Skip Unicode character expansion in assembly files

2017-04-06 Thread Salman Arif via Phabricator via cfe-commits
salari01 created this revision.

When using the C preprocessor with assembly files, either with a capital `S` 
file extension, or with `-xassembler-with-cpp`, the Unicode escape sequence 
`\u` is ignored. The `\u` pattern can be used for expanding a macro argument 
that starts with `u`.


https://reviews.llvm.org/D31765

Files:
  lib/Lex/Lexer.cpp
  test/Lexer/asm-preproc-no-unicode.s


Index: test/Lexer/asm-preproc-no-unicode.s
===
--- /dev/null
+++ test/Lexer/asm-preproc-no-unicode.s
@@ -0,0 +1,13 @@
+// RUN: %clang --target=arm-arm-none-eabi -c -xassembler-with-cpp %s -o %t 
2>&1 | FileCheck %s --check-prefix=WARNING
+// RUN: llvm-objdump -s %t | FileCheck %s --check-prefix=DATA
+
+// WARNING-NOT: warning: \u used with no following hex digits
+// DATA: Contents of section data:
+// DATA-NEXT:  efbeadde
+
+.warning  // required to avoid FileCheck empty input error
+.macro foo, u, name
+.section \name, "a", %progbits
+.word \u
+.endm
+foo 0xdeadbeef, data
Index: lib/Lex/Lexer.cpp
===
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -3603,17 +3603,19 @@
 
   // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
   case '\\':
-if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) {
-  if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
-if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
-  return true; // KeepWhitespaceMode
+if (!LangOpts.AsmPreprocessor) {
+  if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) {
+if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+  if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+return true; // KeepWhitespaceMode
+
+  // We only saw whitespace, so just try again with this lexer.
+  // (We manually eliminate the tail call to avoid recursion.)
+  goto LexNextToken;
+}
 
-// We only saw whitespace, so just try again with this lexer.
-// (We manually eliminate the tail call to avoid recursion.)
-goto LexNextToken;
+return LexUnicode(Result, CodePoint, CurPtr);
   }
-
-  return LexUnicode(Result, CodePoint, CurPtr);
 }
 
 Kind = tok::unknown;


Index: test/Lexer/asm-preproc-no-unicode.s
===
--- /dev/null
+++ test/Lexer/asm-preproc-no-unicode.s
@@ -0,0 +1,13 @@
+// RUN: %clang --target=arm-arm-none-eabi -c -xassembler-with-cpp %s -o %t 2>&1 | FileCheck %s --check-prefix=WARNING
+// RUN: llvm-objdump -s %t | FileCheck %s --check-prefix=DATA
+
+// WARNING-NOT: warning: \u used with no following hex digits
+// DATA: Contents of section data:
+// DATA-NEXT:  efbeadde
+
+.warning  // required to avoid FileCheck empty input error
+.macro foo, u, name
+.section \name, "a", %progbits
+.word \u
+.endm
+foo 0xdeadbeef, data
Index: lib/Lex/Lexer.cpp
===
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -3603,17 +3603,19 @@
 
   // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
   case '\\':
-if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) {
-  if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
-if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
-  return true; // KeepWhitespaceMode
+if (!LangOpts.AsmPreprocessor) {
+  if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, )) {
+if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+  if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+return true; // KeepWhitespaceMode
+
+  // We only saw whitespace, so just try again with this lexer.
+  // (We manually eliminate the tail call to avoid recursion.)
+  goto LexNextToken;
+}
 
-// We only saw whitespace, so just try again with this lexer.
-// (We manually eliminate the tail call to avoid recursion.)
-goto LexNextToken;
+return LexUnicode(Result, CodePoint, CurPtr);
   }
-
-  return LexUnicode(Result, CodePoint, CurPtr);
 }
 
 Kind = tok::unknown;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits