Author: Thibault Monnier
Date: 2026-01-07T20:29:22+01:00
New Revision: 961d52f3304700393b1b03a72c9ad5bb155e851b

URL: 
https://github.com/llvm/llvm-project/commit/961d52f3304700393b1b03a72c9ad5bb155e851b
DIFF: 
https://github.com/llvm/llvm-project/commit/961d52f3304700393b1b03a72c9ad5bb155e851b.diff

LOG: [Clang] [Lexer] Detect SSE4.2 availability at runtime in 
fastParseASCIIIdentifier (#171914)

This change attempts to maximize usage of the SSE fast path in
`fastParseASCIIIdentifier`.

If compiling for x86, we compile both the SSE fast path and the scalar loop. At
runtime, we check if SSE4.2 is available and dispatch to the right
function by using the `target` attribute. If it _is_ available, this
allows a net performance improvement. Otherwise, there's a very slight
but negligible regression... I believe that's perfectly reasonable for a
non-SSE4.2-supporting processor.

If we are not compiling for x86, then the behavior is the exact same, ensuring 
we have
no regressions. If the binary is compiled for x86 with SSE4.2 enabled, we still 
do a runtime check, but this has negligible impact ; furthermore, the point of 
the PR is that this is rarely the case.

The benchmark results are available at
[llvm-compile-time-tracker](https://llvm-compile-time-tracker.com/compare.php?from=f88d060c4176d17df56587a083944637ca865cb3&to=d5485438edd460892bf210916827e0d92fc24065&stat=instructions%3Au).

Added: 
    

Modified: 
    clang/lib/Lex/Lexer.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index afebef0974016..5e8ed5f815c7b 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Support/NativeFormatting.h"
 #include "llvm/Support/Unicode.h"
 #include "llvm/Support/UnicodeCharRanges.h"
+
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
@@ -45,7 +46,7 @@
 #include <optional>
 #include <string>
 
-#ifdef __SSE4_2__
+#if defined(__i386__) || defined(__x86_64__)
 #include <nmmintrin.h>
 #endif
 
@@ -1919,10 +1920,21 @@ bool Lexer::LexUnicodeIdentifierStart(Token &Result, 
uint32_t C,
   return true;
 }
 
-static const char *
-fastParseASCIIIdentifier(const char *CurPtr,
-                         [[maybe_unused]] const char *BufferEnd) {
-#ifdef __SSE4_2__
+static const char *fastParseASCIIIdentifierScalar(const char *CurPtr) {
+  unsigned char C = *CurPtr;
+  while (isAsciiIdentifierContinue(C))
+    C = *++CurPtr;
+  return CurPtr;
+}
+
+// Fast path for lexing ASCII identifiers using SSE4.2 instructions.
+// Only enabled on x86/x86_64 when building with a compiler that supports
+// the 'target' attribute, which is used for runtime dispatch. Otherwise, we
+// fall back to the scalar implementation.
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__has_attribute) &&  
\
+    __has_attribute(target) && !defined(_MSC_VER)
+__attribute__((target("sse4.2"))) static const char *
+fastParseASCIIIdentifierSSE42(const char *CurPtr, const char *BufferEnd) {
   alignas(16) static constexpr char AsciiIdentifierRange[16] = {
       '_', '_', 'A', 'Z', 'a', 'z', '0', '9',
   };
@@ -1942,12 +1954,20 @@ fastParseASCIIIdentifier(const char *CurPtr,
       continue;
     return CurPtr;
   }
-#endif
 
-  unsigned char C = *CurPtr;
-  while (isAsciiIdentifierContinue(C))
-    C = *++CurPtr;
-  return CurPtr;
+  return fastParseASCIIIdentifierScalar(CurPtr);
+}
+
+__attribute__((target("sse4.2"))) static const char *
+fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd) {
+  return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd);
+}
+
+__attribute__((target("default")))
+#endif
+static const char *fastParseASCIIIdentifier(const char *CurPtr,
+                                            const char *BufferEnd) {
+  return fastParseASCIIIdentifierScalar(CurPtr);
 }
 
 bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to