https://github.com/Thibault-Monnier updated https://github.com/llvm/llvm-project/pull/180819
>From faa899a6ce518c1176f2bf59f199eb42e59d840e Mon Sep 17 00:00:00 2001 From: Thibault-Monnier <[email protected]> Date: Tue, 10 Feb 2026 19:41:47 +0100 Subject: [PATCH 1/3] Try prioritizing skipping space --- clang/lib/Lex/Lexer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 1498657047bd6..483cca32e08a2 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2533,8 +2533,8 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, // Skip consecutive spaces efficiently. while (true) { - // Skip horizontal whitespace very aggressively. - while (isHorizontalWhitespace(Char)) + // Skip horizontal whitespace, especially space, very aggressively. + while (LLVM_LIKELY(Char == ' ') || isHorizontalWhitespace(Char)) Char = *++CurPtr; // Otherwise if we have something other than whitespace, we're done. @@ -3756,10 +3756,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { const char *CurPtr = BufferPtr; // Small amounts of horizontal whitespace is very common between tokens. - if (isHorizontalWhitespace(*CurPtr)) { + if (LLVM_LIKELY(*CurPtr == ' ') || isHorizontalWhitespace(*CurPtr)) { do { ++CurPtr; - } while (isHorizontalWhitespace(*CurPtr)); + } while (LLVM_LIKELY(*CurPtr == ' ') || isHorizontalWhitespace(*CurPtr)); // If we are keeping whitespace and other tokens, just return what we just // skipped. The next lexer invocation will return the token after the >From 0ddd945fb9fbde93a49747d5ba2e24d39425a752 Mon Sep 17 00:00:00 2001 From: Thibault-Monnier <[email protected]> Date: Mon, 23 Feb 2026 10:40:32 +0100 Subject: [PATCH 2/3] Try remove LLVM_LIKELY --- clang/lib/Lex/Lexer.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 483cca32e08a2..446a8a6eb7f63 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2534,7 +2534,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, // Skip consecutive spaces efficiently. while (true) { // Skip horizontal whitespace, especially space, very aggressively. - while (LLVM_LIKELY(Char == ' ') || isHorizontalWhitespace(Char)) + while (Char == ' ' || isHorizontalWhitespace(Char)) Char = *++CurPtr; // Otherwise if we have something other than whitespace, we're done. @@ -3756,10 +3756,12 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { const char *CurPtr = BufferPtr; // Small amounts of horizontal whitespace is very common between tokens. - if (LLVM_LIKELY(*CurPtr == ' ') || isHorizontalWhitespace(*CurPtr)) { + // Check for space character separately to skip the expensive + // isHorizontalWhitespace() check + if (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr)) { do { ++CurPtr; - } while (LLVM_LIKELY(*CurPtr == ' ') || isHorizontalWhitespace(*CurPtr)); + } while (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr)); // If we are keeping whitespace and other tokens, just return what we just // skipped. The next lexer invocation will return the token after the >From 62e29012c0ade20d916b9c0d8e111b60758d5326 Mon Sep 17 00:00:00 2001 From: Thibault-Monnier <[email protected]> Date: Mon, 23 Feb 2026 13:35:07 +0100 Subject: [PATCH 3/3] Try changing isHorizontalWhitespace directly --- clang/include/clang/Basic/CharInfo.h | 2 +- clang/lib/Lex/Lexer.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Basic/CharInfo.h b/clang/include/clang/Basic/CharInfo.h index 87626eeb8a700..1ede0328952d8 100644 --- a/clang/include/clang/Basic/CharInfo.h +++ b/clang/include/clang/Basic/CharInfo.h @@ -90,7 +90,7 @@ LLVM_READONLY inline bool isAsciiIdentifierContinue(unsigned char c, /// Note that this returns false for '\\0'. LLVM_READONLY inline bool isHorizontalWhitespace(unsigned char c) { using namespace charinfo; - return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0; + return c == ' ' || (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0; } /// Returns true if this character is vertical ASCII whitespace: '\\n', '\\r'. diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 446a8a6eb7f63..4dc2eebdf0e97 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2534,7 +2534,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, // Skip consecutive spaces efficiently. while (true) { // Skip horizontal whitespace, especially space, very aggressively. - while (Char == ' ' || isHorizontalWhitespace(Char)) + while (isHorizontalWhitespace(Char)) Char = *++CurPtr; // Otherwise if we have something other than whitespace, we're done. @@ -3758,10 +3758,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // Small amounts of horizontal whitespace is very common between tokens. // Check for space character separately to skip the expensive // isHorizontalWhitespace() check - if (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr)) { + if (isHorizontalWhitespace(*CurPtr)) { do { ++CurPtr; - } while (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr)); + } while (isHorizontalWhitespace(*CurPtr)); // If we are keeping whitespace and other tokens, just return what we just // skipped. The next lexer invocation will return the token after the _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
