Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com> Message-ID: In-Reply-To: <llvm.org/llvm/llvm-project/pull/66...@github.com>
https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/66514 >From 001149c81ddeca2488597ebae3604efeaee1c490 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Fri, 15 Sep 2023 15:51:39 +0200 Subject: [PATCH 01/21] [clang][Diagnostics] Highlight code snippets Add some primitive syntax highlighting to our code snippet output. --- .../clang/Frontend/CodeSnippetHighlighter.h | 46 +++++++ clang/include/clang/Frontend/TextDiagnostic.h | 2 + clang/lib/Frontend/CMakeLists.txt | 1 + clang/lib/Frontend/CodeSnippetHighlighter.cpp | 120 ++++++++++++++++++ clang/lib/Frontend/TextDiagnostic.cpp | 26 ++++ 5 files changed, 195 insertions(+) create mode 100644 clang/include/clang/Frontend/CodeSnippetHighlighter.h create mode 100644 clang/lib/Frontend/CodeSnippetHighlighter.cpp diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h new file mode 100644 index 000000000000000..776954b59e2e1a8 --- /dev/null +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -0,0 +1,46 @@ +//===--- CodeSnippetHighlighter.h - Code snippet highlighting ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H +#define LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H + +#include "clang/Basic/LangOptions.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Support/raw_ostream.h" +#include <vector> + +namespace clang { + +struct StyleRange { + unsigned Start; + unsigned End; + const enum llvm::raw_ostream::Colors c; +}; + +class CodeSnippetHighlighter final { +public: + CodeSnippetHighlighter() = default; + + /// Produce StyleRanges for the given line. + /// The returned vector contains non-overlapping style ranges. They are sorted + /// from beginning of the line to the end. + std::vector<StyleRange> highlightLine(llvm::StringRef SourceLine, + const LangOptions &LangOpts); + +private: + bool Initialized = false; + /// Fills Keywords and Literals. + void ensureTokenData(); + + llvm::SmallSet<StringRef, 12> Keywords; + llvm::SmallSet<StringRef, 12> Literals; +}; + +} // namespace clang + +#endif diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index 7eb0ab0cdc9bca8..59fd4d4f9408d48 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -15,6 +15,7 @@ #ifndef LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H #define LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H +#include "clang/Frontend/CodeSnippetHighlighter.h" #include "clang/Frontend/DiagnosticRenderer.h" namespace clang { @@ -33,6 +34,7 @@ namespace clang { /// printing coming out of libclang. class TextDiagnostic : public DiagnosticRenderer { raw_ostream &OS; + CodeSnippetHighlighter SnippetHighlighter; public: TextDiagnostic(raw_ostream &OS, diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt index 1e5f0a859dfd568..f3547f771593093 100644 --- a/clang/lib/Frontend/CMakeLists.txt +++ b/clang/lib/Frontend/CMakeLists.txt @@ -42,6 +42,7 @@ add_clang_library(clangFrontend TextDiagnosticPrinter.cpp VerifyDiagnosticConsumer.cpp InterfaceStubFunctionsConsumer.cpp + CodeSnippetHighlighter.cpp DEPENDS ClangDriverOptions diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp new file mode 100644 index 000000000000000..829a533ad2692e5 --- /dev/null +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -0,0 +1,120 @@ + +#include "clang/Frontend/CodeSnippetHighlighter.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; + +void CodeSnippetHighlighter::ensureTokenData() { + if (Initialized) + return; + + // List of keywords, literals and types we want to highlight. + // These are best-effort, as is everything we do wrt. highlighting. + Keywords.insert("_Static_assert"); + Keywords.insert("auto"); + Keywords.insert("concept"); + Keywords.insert("const"); + Keywords.insert("consteval"); + Keywords.insert("constexpr"); + Keywords.insert("delete"); + Keywords.insert("do"); + Keywords.insert("else"); + Keywords.insert("final"); + Keywords.insert("for"); + Keywords.insert("if"); + Keywords.insert("mutable"); + Keywords.insert("namespace"); + Keywords.insert("new"); + Keywords.insert("private"); + Keywords.insert("public"); + Keywords.insert("requires"); + Keywords.insert("return"); + Keywords.insert("static"); + Keywords.insert("static_assert"); + Keywords.insert("using"); + Keywords.insert("void"); + Keywords.insert("volatile"); + Keywords.insert("while"); + + // Builtin types we highlight + Keywords.insert("void"); + Keywords.insert("char"); + Keywords.insert("short"); + Keywords.insert("int"); + Keywords.insert("unsigned"); + Keywords.insert("long"); + Keywords.insert("float"); + Keywords.insert("double"); + + Literals.insert("true"); + Literals.insert("false"); + Literals.insert("nullptr"); + + Initialized = true; +} + +static SourceManager createTempSourceManager() { + FileSystemOptions FileOpts; + FileManager FileMgr(FileOpts); + llvm::IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs()); + llvm::IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions()); + DiagnosticsEngine diags(DiagIDs, DiagOpts); + return SourceManager(diags, FileMgr); +} + +static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM, + const LangOptions &LangOpts) { + return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts); +} + +std::vector<StyleRange> +CodeSnippetHighlighter::highlightLine(StringRef SourceLine, + const LangOptions &LangOpts) { + ensureTokenData(); + + constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK; + constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; + constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW; + + const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine); + SourceManager FakeSM = createTempSourceManager(); + Lexer L = createTempLexer(MemBuf->getMemBufferRef(), FakeSM, LangOpts); + L.SetKeepWhitespaceMode(true); + + std::vector<StyleRange> Styles; + bool Stop = false; + while (!Stop) { + Token tok; + Stop = L.LexFromRawLexer(tok); + if (tok.is(tok::unknown)) + continue; + + bool Invalid; + unsigned Start = + FakeSM.getSpellingColumnNumber(tok.getLocation(), &Invalid) - 1; + if (Invalid) + continue; + + if (tok.is(tok::raw_identifier)) { + // Almost everything we lex is an identifier, since we use a raw lexer. + // Some should be highlightes as literals, others as keywords. + if (Keywords.contains(tok.getRawIdentifier())) + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), KeywordColor}); + else if (Literals.contains(tok.getRawIdentifier())) + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), LiteralColor}); + } else if (tok::isLiteral(tok.getKind())) { + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), LiteralColor}); + } else if (tok.is(tok::comment)) { + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), CommentColor}); + } + } + + return Styles; +} diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index 779dead5d058d1a..13d7d1e048cf991 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -11,6 +11,7 @@ #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" +#include "clang/Frontend/CodeSnippetHighlighter.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -1278,6 +1279,9 @@ void TextDiagnostic::emitSnippetAndCaret( void TextDiagnostic::emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, unsigned LineNo) { + std::vector<StyleRange> Styles = + SnippetHighlighter.highlightLine(SourceLine, LangOpts); + // Emit line number. if (MaxLineNoDisplayWidth > 0) { unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo); @@ -1287,11 +1291,33 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine, // Print the source line one character at a time. bool PrintReversed = false; + bool HighlightingEnabled = DiagOpts->ShowColors; size_t I = 0; while (I < SourceLine.size()) { auto [Str, WasPrintable] = printableTextForNextCharacter(SourceLine, &I, DiagOpts->TabStop); + // Just stop highlighting anything for this line if we found a non-printable + // character. + if (!WasPrintable) + HighlightingEnabled = false; + + // FIXME: I hope we can do this in some nicer way. + if (HighlightingEnabled) { + std::optional<enum raw_ostream::Colors> H; + for (auto &P : Styles) { + if (P.Start < I && P.End >= I) { + H = P.c; + break; + } + } + + if (H) { + OS.changeColor(*H, false); + } else + OS.resetColor(); + } + // Toggle inverted colors on or off for this character. if (DiagOpts->ShowColors) { if (WasPrintable == PrintReversed) { >From 50c83be2eed0f59112c96841c9cc077deb4e3bb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Wed, 20 Sep 2023 15:28:10 +0200 Subject: [PATCH 02/21] Get identifier table from Preprocessor --- .../clang/Frontend/CodeSnippetHighlighter.h | 11 +- clang/include/clang/Frontend/TextDiagnostic.h | 7 +- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 101 +++++------------- clang/lib/Frontend/TextDiagnostic.cpp | 8 +- clang/lib/Frontend/TextDiagnosticPrinter.cpp | 2 +- 5 files changed, 39 insertions(+), 90 deletions(-) diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h index 776954b59e2e1a8..ec03375221f9ffc 100644 --- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -22,6 +22,8 @@ struct StyleRange { const enum llvm::raw_ostream::Colors c; }; +class Preprocessor; + class CodeSnippetHighlighter final { public: CodeSnippetHighlighter() = default; @@ -30,15 +32,8 @@ class CodeSnippetHighlighter final { /// The returned vector contains non-overlapping style ranges. They are sorted /// from beginning of the line to the end. std::vector<StyleRange> highlightLine(llvm::StringRef SourceLine, + const Preprocessor *PP, const LangOptions &LangOpts); - -private: - bool Initialized = false; - /// Fills Keywords and Literals. - void ensureTokenData(); - - llvm::SmallSet<StringRef, 12> Keywords; - llvm::SmallSet<StringRef, 12> Literals; }; } // namespace clang diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index 59fd4d4f9408d48..8cdb9b141a8a4af 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -19,7 +19,6 @@ #include "clang/Frontend/DiagnosticRenderer.h" namespace clang { - /// Class to encapsulate the logic for formatting and printing a textual /// diagnostic message. /// @@ -34,12 +33,12 @@ namespace clang { /// printing coming out of libclang. class TextDiagnostic : public DiagnosticRenderer { raw_ostream &OS; + const Preprocessor *PP; CodeSnippetHighlighter SnippetHighlighter; public: - TextDiagnostic(raw_ostream &OS, - const LangOptions &LangOpts, - DiagnosticOptions *DiagOpts); + TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, + const Preprocessor *PP, DiagnosticOptions *DiagOpts); ~TextDiagnostic() override; diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 829a533ad2692e5..63b3707fbb7ef83 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -3,59 +3,12 @@ #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" #include "llvm/Support/raw_ostream.h" using namespace clang; -void CodeSnippetHighlighter::ensureTokenData() { - if (Initialized) - return; - - // List of keywords, literals and types we want to highlight. - // These are best-effort, as is everything we do wrt. highlighting. - Keywords.insert("_Static_assert"); - Keywords.insert("auto"); - Keywords.insert("concept"); - Keywords.insert("const"); - Keywords.insert("consteval"); - Keywords.insert("constexpr"); - Keywords.insert("delete"); - Keywords.insert("do"); - Keywords.insert("else"); - Keywords.insert("final"); - Keywords.insert("for"); - Keywords.insert("if"); - Keywords.insert("mutable"); - Keywords.insert("namespace"); - Keywords.insert("new"); - Keywords.insert("private"); - Keywords.insert("public"); - Keywords.insert("requires"); - Keywords.insert("return"); - Keywords.insert("static"); - Keywords.insert("static_assert"); - Keywords.insert("using"); - Keywords.insert("void"); - Keywords.insert("volatile"); - Keywords.insert("while"); - - // Builtin types we highlight - Keywords.insert("void"); - Keywords.insert("char"); - Keywords.insert("short"); - Keywords.insert("int"); - Keywords.insert("unsigned"); - Keywords.insert("long"); - Keywords.insert("float"); - Keywords.insert("double"); - - Literals.insert("true"); - Literals.insert("false"); - Literals.insert("nullptr"); - - Initialized = true; -} - static SourceManager createTempSourceManager() { FileSystemOptions FileOpts; FileManager FileMgr(FileOpts); @@ -70,49 +23,51 @@ static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM, return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts); } -std::vector<StyleRange> -CodeSnippetHighlighter::highlightLine(StringRef SourceLine, - const LangOptions &LangOpts) { - ensureTokenData(); - +std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( + StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) { constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK; constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW; - const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine); SourceManager FakeSM = createTempSourceManager(); + const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine); Lexer L = createTempLexer(MemBuf->getMemBufferRef(), FakeSM, LangOpts); L.SetKeepWhitespaceMode(true); std::vector<StyleRange> Styles; bool Stop = false; while (!Stop) { - Token tok; - Stop = L.LexFromRawLexer(tok); - if (tok.is(tok::unknown)) + Token T; + Stop = L.LexFromRawLexer(T); + if (T.is(tok::unknown)) continue; bool Invalid; unsigned Start = - FakeSM.getSpellingColumnNumber(tok.getLocation(), &Invalid) - 1; + FakeSM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; if (Invalid) continue; - if (tok.is(tok::raw_identifier)) { - // Almost everything we lex is an identifier, since we use a raw lexer. - // Some should be highlightes as literals, others as keywords. - if (Keywords.contains(tok.getRawIdentifier())) - Styles.push_back( - StyleRange{Start, Start + tok.getLength(), KeywordColor}); - else if (Literals.contains(tok.getRawIdentifier())) + if (T.is(tok::raw_identifier)) { + StringRef RawIdent = T.getRawIdentifier(); + // Special case true/false/nullptr literals, since they will otherwise be + // treated as keywords. + if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") { Styles.push_back( - StyleRange{Start, Start + tok.getLength(), LiteralColor}); - } else if (tok::isLiteral(tok.getKind())) { - Styles.push_back( - StyleRange{Start, Start + tok.getLength(), LiteralColor}); - } else if (tok.is(tok::comment)) { - Styles.push_back( - StyleRange{Start, Start + tok.getLength(), CommentColor}); + StyleRange{Start, Start + T.getLength(), LiteralColor}); + } else { + const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent); + assert(II); + + if (II->isKeyword(LangOpts)) { + Styles.push_back( + StyleRange{Start, Start + T.getLength(), KeywordColor}); + } + } + } else if (tok::isLiteral(T.getKind())) { + Styles.push_back(StyleRange{Start, Start + T.getLength(), LiteralColor}); + } else if (T.is(tok::comment)) { + Styles.push_back(StyleRange{Start, Start + T.getLength(), CommentColor}); } } diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index 13d7d1e048cf991..e840cdd952d09f1 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -645,10 +645,10 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns, return Wrapped; } -TextDiagnostic::TextDiagnostic(raw_ostream &OS, - const LangOptions &LangOpts, +TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, + const Preprocessor *PP, DiagnosticOptions *DiagOpts) - : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS) {} + : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {} TextDiagnostic::~TextDiagnostic() {} @@ -1280,7 +1280,7 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, unsigned LineNo) { std::vector<StyleRange> Styles = - SnippetHighlighter.highlightLine(SourceLine, LangOpts); + SnippetHighlighter.highlightLine(SourceLine, PP, LangOpts); // Emit line number. if (MaxLineNoDisplayWidth > 0) { diff --git a/clang/lib/Frontend/TextDiagnosticPrinter.cpp b/clang/lib/Frontend/TextDiagnosticPrinter.cpp index 0ff5376098ffe8d..3bc3935078baada 100644 --- a/clang/lib/Frontend/TextDiagnosticPrinter.cpp +++ b/clang/lib/Frontend/TextDiagnosticPrinter.cpp @@ -36,7 +36,7 @@ TextDiagnosticPrinter::~TextDiagnosticPrinter() { void TextDiagnosticPrinter::BeginSourceFile(const LangOptions &LO, const Preprocessor *PP) { // Build the TextDiagnostic utility. - TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts)); + TextDiag.reset(new TextDiagnostic(OS, LO, PP, &*DiagOpts)); } void TextDiagnosticPrinter::EndSourceFile() { >From 9a6c4ec9a728331acef6dc46c1e3b6551d1fe4c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Wed, 20 Sep 2023 17:24:42 +0200 Subject: [PATCH 03/21] Move the PP parameter to the end of the TextDiagnostic ctor --- clang/include/clang/Frontend/TextDiagnostic.h | 2 +- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 2 ++ clang/lib/Frontend/TextDiagnostic.cpp | 4 ++-- clang/lib/Frontend/TextDiagnosticPrinter.cpp | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index 8cdb9b141a8a4af..43c39ff96a2d1ce 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -38,7 +38,7 @@ class TextDiagnostic : public DiagnosticRenderer { public: TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, - const Preprocessor *PP, DiagnosticOptions *DiagOpts); + DiagnosticOptions *DiagOpts, const Preprocessor *PP = nullptr); ~TextDiagnostic() override; diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 63b3707fbb7ef83..32bd61f3746023c 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -25,6 +25,8 @@ static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM, std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) { + if (!PP) + return {}; constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK; constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW; diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index e840cdd952d09f1..c9207e9dfbf9217 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -646,8 +646,8 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns, } TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, - const Preprocessor *PP, - DiagnosticOptions *DiagOpts) + DiagnosticOptions *DiagOpts, + const Preprocessor *PP) : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {} TextDiagnostic::~TextDiagnostic() {} diff --git a/clang/lib/Frontend/TextDiagnosticPrinter.cpp b/clang/lib/Frontend/TextDiagnosticPrinter.cpp index 3bc3935078baada..b2fb762537573ef 100644 --- a/clang/lib/Frontend/TextDiagnosticPrinter.cpp +++ b/clang/lib/Frontend/TextDiagnosticPrinter.cpp @@ -36,7 +36,7 @@ TextDiagnosticPrinter::~TextDiagnosticPrinter() { void TextDiagnosticPrinter::BeginSourceFile(const LangOptions &LO, const Preprocessor *PP) { // Build the TextDiagnostic utility. - TextDiag.reset(new TextDiagnostic(OS, LO, PP, &*DiagOpts)); + TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts, PP)); } void TextDiagnosticPrinter::EndSourceFile() { >From 7fe69bd8b5d42449955da5447b0e830f6de0f2e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Thu, 21 Sep 2023 06:38:24 +0200 Subject: [PATCH 04/21] Tune colors --- clang/include/clang/Frontend/CodeSnippetHighlighter.h | 2 +- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 6 +++--- clang/lib/Frontend/TextDiagnostic.cpp | 7 ++++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h index ec03375221f9ffc..c2a0184085d5da4 100644 --- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -19,7 +19,7 @@ namespace clang { struct StyleRange { unsigned Start; unsigned End; - const enum llvm::raw_ostream::Colors c; + const enum llvm::raw_ostream::Colors color; }; class Preprocessor; diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 32bd61f3746023c..dba7f5d2848505a 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -27,9 +27,9 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) { if (!PP) return {}; - constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK; - constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; - constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW; + constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN; + constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN; + constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; SourceManager FakeSM = createTempSourceManager(); const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine); diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index c9207e9dfbf9217..35a92a8044f2e52 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Locale.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <optional> @@ -1307,14 +1308,14 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine, std::optional<enum raw_ostream::Colors> H; for (auto &P : Styles) { if (P.Start < I && P.End >= I) { - H = P.c; + H = P.color; break; } } - if (H) { + if (H) OS.changeColor(*H, false); - } else + else OS.resetColor(); } >From 8ac87ae8e97245d7de20fc1ac8a5a895469d89f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Thu, 21 Sep 2023 11:01:43 +0200 Subject: [PATCH 05/21] Lex the entire file --- .../clang/Frontend/CodeSnippetHighlighter.h | 7 +- clang/include/clang/Frontend/TextDiagnostic.h | 3 +- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 138 ++++++++++++------ clang/lib/Frontend/TextDiagnostic.cpp | 14 +- 4 files changed, 111 insertions(+), 51 deletions(-) diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h index c2a0184085d5da4..51c14880fb95485 100644 --- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -23,6 +23,8 @@ struct StyleRange { }; class Preprocessor; +class FileID; +class SourceManager; class CodeSnippetHighlighter final { public: @@ -31,9 +33,10 @@ class CodeSnippetHighlighter final { /// Produce StyleRanges for the given line. /// The returned vector contains non-overlapping style ranges. They are sorted /// from beginning of the line to the end. - std::vector<StyleRange> highlightLine(llvm::StringRef SourceLine, + std::vector<StyleRange> highlightLine(unsigned LineNumber, const Preprocessor *PP, - const LangOptions &LangOpts); + const LangOptions &LangOpts, FileID FID, + const SourceManager &SM); }; } // namespace clang diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index 43c39ff96a2d1ce..102b33aedd5ef98 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -105,7 +105,8 @@ class TextDiagnostic : public DiagnosticRenderer { ArrayRef<FixItHint> Hints); void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, - unsigned LineNo); + FileID FID, const SourceManager &SM, unsigned LineNo, + unsigned DisplayLineNo); void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM); }; diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index dba7f5d2848505a..d319e690a355b2b 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -9,34 +9,47 @@ using namespace clang; -static SourceManager createTempSourceManager() { - FileSystemOptions FileOpts; - FileManager FileMgr(FileOpts); - llvm::IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs()); - llvm::IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions()); - DiagnosticsEngine diags(DiagIDs, DiagOpts); - return SourceManager(diags, FileMgr); -} - -static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM, - const LangOptions &LangOpts) { - return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts); -} +static constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN; +static constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN; +static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( - StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) { + unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts, + FileID FID, const SourceManager &SM) { if (!PP) return {}; - constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN; - constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN; - constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; - SourceManager FakeSM = createTempSourceManager(); - const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine); - Lexer L = createTempLexer(MemBuf->getMemBufferRef(), FakeSM, LangOpts); + // Classify the given token and append it to the given vector. + auto appendStyle = [PP, &LangOpts](std::vector<StyleRange> &Vec, + const Token &T, unsigned Start, + unsigned Length) -> void { + if (T.is(tok::raw_identifier)) { + StringRef RawIdent = T.getRawIdentifier(); + // Special case true/false/nullptr literals, since they will otherwise be + // treated as keywords. + if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") { + Vec.push_back(StyleRange{Start, Start + Length, LiteralColor}); + } else { + const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent); + assert(II); + + if (II->isKeyword(LangOpts)) { + Vec.push_back(StyleRange{Start, Start + Length, KeywordColor}); + } + } + } else if (tok::isLiteral(T.getKind())) { + Vec.push_back(StyleRange{Start, Start + Length, LiteralColor}); + } else if (T.is(tok::comment)) { + Vec.push_back(StyleRange{Start, Start + Length, CommentColor}); + } + }; + + auto Buff = SM.getBufferOrNone(FID); + assert(Buff); + Lexer L = Lexer(FID, *Buff, SM, LangOpts); L.SetKeepWhitespaceMode(true); + std::vector<std::vector<StyleRange>> Lines; - std::vector<StyleRange> Styles; bool Stop = false; while (!Stop) { Token T; @@ -45,33 +58,74 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( continue; bool Invalid; - unsigned Start = - FakeSM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; + unsigned StartCol = + SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; + if (Invalid) + continue; + unsigned StartLine = + SM.getSpellingLineNumber(T.getLocation(), &Invalid) - 1; if (Invalid) continue; - if (T.is(tok::raw_identifier)) { - StringRef RawIdent = T.getRawIdentifier(); - // Special case true/false/nullptr literals, since they will otherwise be - // treated as keywords. - if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") { - Styles.push_back( - StyleRange{Start, Start + T.getLength(), LiteralColor}); - } else { - const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent); - assert(II); + while (Lines.size() <= StartLine) + Lines.push_back({}); - if (II->isKeyword(LangOpts)) { - Styles.push_back( - StyleRange{Start, Start + T.getLength(), KeywordColor}); - } + unsigned EndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid) - 1; + if (Invalid) + continue; + + // Simple tokens. + if (StartLine == EndLine) { + appendStyle(Lines[StartLine], T, StartCol, T.getLength()); + continue; + } + unsigned NumLines = EndLine - StartLine; + + // For tokens that span multiple lines (think multiline comments), we + // divide them into multiple StyleRanges. + unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1; + if (Invalid) + continue; + + std::string Spelling = Lexer::getSpelling(T, SM, LangOpts); + + unsigned L = 0; + unsigned LineLength = 0; + for (unsigned I = 0; I <= Spelling.size(); ++I) { + // This line is done. + if (Spelling[I] == '\n' || Spelling[I] == '\r' || I == Spelling.size()) { + while (Lines.size() <= StartLine + L) + Lines.push_back({}); + + if (L == 0) // First line + appendStyle(Lines[StartLine + L], T, StartCol, LineLength); + else if (L == NumLines) // Last line + appendStyle(Lines[StartLine + L], T, 0, EndCol); + else + appendStyle(Lines[StartLine + L], T, 0, LineLength); + ++L; + LineLength = 0; + continue; } - } else if (tok::isLiteral(T.getKind())) { - Styles.push_back(StyleRange{Start, Start + T.getLength(), LiteralColor}); - } else if (T.is(tok::comment)) { - Styles.push_back(StyleRange{Start, Start + T.getLength(), CommentColor}); + ++LineLength; + } + } + +#if 0 + llvm::errs() << "--\nLine Style info: \n"; + int I = 0; + for (std::vector<StyleRange> &Line : Lines) { + llvm::errs() << I << ": "; + for (const auto &R : Line) { + llvm::errs() << "{" << R.Start << ", " << R.End << "}, "; } + llvm::errs() << "\n"; + + ++I; } +#endif - return Styles; + while (Lines.size() <= LineNumber) + Lines.push_back({}); + return Lines[LineNumber]; } diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index 35a92a8044f2e52..5aea7b8f4210749 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -1249,7 +1249,8 @@ void TextDiagnostic::emitSnippetAndCaret( } // Emit what we have computed. - emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo); + emitSnippet(SourceLine, MaxLineNoDisplayWidth, FID, SM, LineNo, + DisplayLineNo); if (!CaretLine.empty()) { indentForLineNumbers(); @@ -1278,16 +1279,17 @@ void TextDiagnostic::emitSnippetAndCaret( } void TextDiagnostic::emitSnippet(StringRef SourceLine, - unsigned MaxLineNoDisplayWidth, - unsigned LineNo) { + unsigned MaxLineNoDisplayWidth, FileID FID, + const SourceManager &SM, unsigned LineNo, + unsigned DisplayLineNo) { std::vector<StyleRange> Styles = - SnippetHighlighter.highlightLine(SourceLine, PP, LangOpts); + SnippetHighlighter.highlightLine(LineNo - 1, PP, LangOpts, FID, SM); // Emit line number. if (MaxLineNoDisplayWidth > 0) { - unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo); + unsigned LineNoDisplayWidth = getNumDisplayWidth(DisplayLineNo); OS.indent(MaxLineNoDisplayWidth - LineNoDisplayWidth + 1) - << LineNo << " | "; + << DisplayLineNo << " | "; } // Print the source line one character at a time. >From 9c3352e2ac54693d1431a73f892002701c9fb122 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Fri, 22 Sep 2023 06:48:55 +0200 Subject: [PATCH 06/21] Try to fix PCH test --- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index d319e690a355b2b..8905fbfb29b8927 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -19,6 +19,10 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( if (!PP) return {}; + // Might cause emission of another diagnostic. + if (PP->getIdentifierTable().getExternalIdentifierLookup()) + return {}; + // Classify the given token and append it to the given vector. auto appendStyle = [PP, &LangOpts](std::vector<StyleRange> &Vec, const Token &T, unsigned Start, >From 3b6df018b76ab6f6c0f6e822310b6fe2dafe6d25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Tue, 26 Sep 2023 08:11:58 +0200 Subject: [PATCH 07/21] Measurements --- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 8905fbfb29b8927..17614a962ee6a7f 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -6,6 +6,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" #include "llvm/Support/raw_ostream.h" +#include <chrono> using namespace clang; @@ -16,6 +17,9 @@ static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts, FileID FID, const SourceManager &SM) { + std::chrono::steady_clock::time_point begin = + std::chrono::steady_clock::now(); + if (!PP) return {}; @@ -23,6 +27,7 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( if (PP->getIdentifierTable().getExternalIdentifierLookup()) return {}; + size_t NTokens = 0; // Classify the given token and append it to the given vector. auto appendStyle = [PP, &LangOpts](std::vector<StyleRange> &Vec, const Token &T, unsigned Start, @@ -56,6 +61,7 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( bool Stop = false; while (!Stop) { + ++NTokens; Token T; Stop = L.LexFromRawLexer(T); if (T.is(tok::unknown)) @@ -131,5 +137,23 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( while (Lines.size() <= LineNumber) Lines.push_back({}); + + std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); + llvm::errs() << "Lexed " << Lines.size() << " lines and " << NTokens + << " Tokens\n"; + llvm::errs() << "That took " + << std::chrono::duration_cast<std::chrono::microseconds>(end - + begin) + .count() + << " microseconds\n"; + llvm::errs() << "That took " + << std::chrono::duration_cast<std::chrono::milliseconds>(end - + begin) + .count() + << " milliseconds\n"; + llvm::errs() + << "That took " + << std::chrono::duration_cast<std::chrono::seconds>(end - begin).count() + << " seconds\n"; return Lines[LineNumber]; } >From 871ad4332a683d5de11dfd20df10f6a5ac7d9a3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Wed, 27 Sep 2023 08:05:09 +0200 Subject: [PATCH 08/21] Slightly improve performance by bailing out earlier --- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 17614a962ee6a7f..71e5c30e56d146a 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -41,14 +41,13 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( } else { const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent); assert(II); - - if (II->isKeyword(LangOpts)) { + if (II->isKeyword(LangOpts)) Vec.push_back(StyleRange{Start, Start + Length, KeywordColor}); - } } } else if (tok::isLiteral(T.getKind())) { Vec.push_back(StyleRange{Start, Start + Length, LiteralColor}); - } else if (T.is(tok::comment)) { + } else { + assert(T.is(tok::comment)); Vec.push_back(StyleRange{Start, Start + Length, CommentColor}); } }; @@ -67,6 +66,11 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( if (T.is(tok::unknown)) continue; + // We are only interested in identifiers, literals and comments. + if (!T.is(tok::raw_identifier) && !T.is(tok::comment) && + !tok::isLiteral(T.getKind())) + continue; + bool Invalid; unsigned StartCol = SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; @@ -138,6 +142,7 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( while (Lines.size() <= LineNumber) Lines.push_back({}); +#if 0 std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); llvm::errs() << "Lexed " << Lines.size() << " lines and " << NTokens << " Tokens\n"; @@ -155,5 +160,6 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( << "That took " << std::chrono::duration_cast<std::chrono::seconds>(end - begin).count() << " seconds\n"; +#endif return Lines[LineNumber]; } >From 1a872fa6faaf81e2e68a7b465239afcd757fe6f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Wed, 27 Sep 2023 10:45:36 +0200 Subject: [PATCH 09/21] Only care about tokens that touch our LineNumber. --- .../clang/Frontend/CodeSnippetHighlighter.h | 2 +- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 19 +++++++++++++------ clang/lib/Frontend/TextDiagnostic.cpp | 2 +- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h index 51c14880fb95485..a65bd3991d4eff2 100644 --- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -19,7 +19,7 @@ namespace clang { struct StyleRange { unsigned Start; unsigned End; - const enum llvm::raw_ostream::Colors color; + const enum llvm::raw_ostream::Colors Color; }; class Preprocessor; diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 71e5c30e56d146a..7663155c6c83923 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -71,23 +71,30 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( !tok::isLiteral(T.getKind())) continue; - bool Invalid; - unsigned StartCol = - SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; + bool Invalid = false; + unsigned EndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid) - 1; if (Invalid) continue; + + if (EndLine < LineNumber) + continue; unsigned StartLine = SM.getSpellingLineNumber(T.getLocation(), &Invalid) - 1; if (Invalid) continue; + if (StartLine > LineNumber) + break; - while (Lines.size() <= StartLine) - Lines.push_back({}); + // Must have an intersection at this point + assert(StartLine <= LineNumber && EndLine >= LineNumber); - unsigned EndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid) - 1; + unsigned StartCol = + SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; if (Invalid) continue; + while (Lines.size() <= StartLine) + Lines.push_back({}); // Simple tokens. if (StartLine == EndLine) { appendStyle(Lines[StartLine], T, StartCol, T.getLength()); diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index 5aea7b8f4210749..f2793d23522f1a5 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -1310,7 +1310,7 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine, std::optional<enum raw_ostream::Colors> H; for (auto &P : Styles) { if (P.Start < I && P.End >= I) { - H = P.color; + H = P.Color; break; } } >From cbbc1647cd233e3d0a5675e07c78ab27f4c055fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Fri, 6 Oct 2023 15:28:25 +0200 Subject: [PATCH 10/21] Add checkpoints to Preprocessor --- .../clang/Frontend/CodeSnippetHighlighter.h | 13 ++-- clang/include/clang/Frontend/TextDiagnostic.h | 2 +- clang/include/clang/Lex/Preprocessor.h | 5 ++ clang/lib/Frontend/CodeSnippetHighlighter.cpp | 64 +++++++++++-------- clang/lib/Frontend/TextDiagnostic.cpp | 10 +-- clang/lib/Lex/Preprocessor.cpp | 27 ++++++++ 6 files changed, 81 insertions(+), 40 deletions(-) diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h index a65bd3991d4eff2..451a182b3e35317 100644 --- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -10,16 +10,15 @@ #define LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H #include "clang/Basic/LangOptions.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/raw_ostream.h" -#include <vector> namespace clang { struct StyleRange { unsigned Start; unsigned End; - const enum llvm::raw_ostream::Colors Color; + enum llvm::raw_ostream::Colors Color; }; class Preprocessor; @@ -33,10 +32,10 @@ class CodeSnippetHighlighter final { /// Produce StyleRanges for the given line. /// The returned vector contains non-overlapping style ranges. They are sorted /// from beginning of the line to the end. - std::vector<StyleRange> highlightLine(unsigned LineNumber, - const Preprocessor *PP, - const LangOptions &LangOpts, FileID FID, - const SourceManager &SM); + llvm::SmallVector<StyleRange> + highlightLine(unsigned LineNumber, const Preprocessor *PP, + const LangOptions &LangOpts, FileID FID, + const SourceManager &SM, const char *LineStart); }; } // namespace clang diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index 102b33aedd5ef98..ecd5bb4a4f568dc 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -106,7 +106,7 @@ class TextDiagnostic : public DiagnosticRenderer { void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, FileID FID, const SourceManager &SM, unsigned LineNo, - unsigned DisplayLineNo); + unsigned DisplayLineNo, const char *LineStart); void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM); }; diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 4ec21a8b6be2c85..07c44794520f667 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -128,6 +128,7 @@ enum MacroUse { class Preprocessor { friend class VAOptDefinitionContext; friend class VariadicMacroScopeGuard; + friend class CodeSnippetHighlighter; llvm::unique_function<void(const clang::Token &)> OnToken; std::shared_ptr<PreprocessorOptions> PPOpts; @@ -141,6 +142,10 @@ class Preprocessor { HeaderSearch &HeaderInfo; ModuleLoader &TheModuleLoader; + llvm::SmallVector<const char *> CheckPoints; + void saveCheckPoint(const char *P); + const char *getSaveFor(const char *S) const; + /// External source of macros. ExternalPreprocessorSource *ExternalSource; diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 7663155c6c83923..28b66d4c05b8c8b 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -14,9 +14,9 @@ static constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN; static constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN; static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; -std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( +llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine( unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts, - FileID FID, const SourceManager &SM) { + FileID FID, const SourceManager &SM, const char *LineStart) { std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); @@ -29,7 +29,7 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( size_t NTokens = 0; // Classify the given token and append it to the given vector. - auto appendStyle = [PP, &LangOpts](std::vector<StyleRange> &Vec, + auto appendStyle = [PP, &LangOpts](llvm::SmallVector<StyleRange> &Vec, const Token &T, unsigned Start, unsigned Length) -> void { if (T.is(tok::raw_identifier)) { @@ -52,12 +52,23 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( } }; + // Figure out where to start lexing from. auto Buff = SM.getBufferOrNone(FID); assert(Buff); Lexer L = Lexer(FID, *Buff, SM, LangOpts); L.SetKeepWhitespaceMode(true); - std::vector<std::vector<StyleRange>> Lines; + // Seek to the last save point before the start of the line. + if (const char *Save = PP->getSaveFor(LineStart); + Buff->getBufferStart() <= Save && Save < Buff->getBufferEnd()) { + size_t Offset = Save - Buff->getBufferStart(); + assert(Save >= Buff->getBufferStart()); + assert(Save <= Buff->getBufferEnd()); + + L.seek(Offset, /*IsAtStartOfLine=*/true); + } + + llvm::SmallVector<StyleRange> LineRanges; bool Stop = false; while (!Stop) { ++NTokens; @@ -93,14 +104,13 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( if (Invalid) continue; - while (Lines.size() <= StartLine) - Lines.push_back({}); // Simple tokens. if (StartLine == EndLine) { - appendStyle(Lines[StartLine], T, StartCol, T.getLength()); + appendStyle(LineRanges, T, StartCol, T.getLength()); continue; } unsigned NumLines = EndLine - StartLine; + assert(NumLines >= 1); // For tokens that span multiple lines (think multiline comments), we // divide them into multiple StyleRanges. @@ -115,15 +125,17 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( for (unsigned I = 0; I <= Spelling.size(); ++I) { // This line is done. if (Spelling[I] == '\n' || Spelling[I] == '\r' || I == Spelling.size()) { - while (Lines.size() <= StartLine + L) - Lines.push_back({}); - - if (L == 0) // First line - appendStyle(Lines[StartLine + L], T, StartCol, LineLength); - else if (L == NumLines) // Last line - appendStyle(Lines[StartLine + L], T, 0, EndCol); - else - appendStyle(Lines[StartLine + L], T, 0, LineLength); + if (StartLine + L == LineNumber) { + if (L == 0) // First line + appendStyle(LineRanges, T, StartCol, LineLength); + else if (L == NumLines) // Last line + appendStyle(LineRanges, T, 0, EndCol); + else + appendStyle(LineRanges, T, 0, LineLength); + + // We only do one line, so we're done. + break; + } ++L; LineLength = 0; continue; @@ -134,25 +146,21 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( #if 0 llvm::errs() << "--\nLine Style info: \n"; - int I = 0; - for (std::vector<StyleRange> &Line : Lines) { - llvm::errs() << I << ": "; - for (const auto &R : Line) { + //int I = 0; + //for (std::vector<StyleRange> &Line : Lines) { + //llvm::errs() << I << ": "; + for (const auto &R : LineRanges) { llvm::errs() << "{" << R.Start << ", " << R.End << "}, "; } llvm::errs() << "\n"; - ++I; - } + //++I; + //} #endif - while (Lines.size() <= LineNumber) - Lines.push_back({}); - #if 0 std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); - llvm::errs() << "Lexed " << Lines.size() << " lines and " << NTokens - << " Tokens\n"; + llvm::errs() << "Lexed " << NTokens << " Tokens\n"; llvm::errs() << "That took " << std::chrono::duration_cast<std::chrono::microseconds>(end - begin) @@ -168,5 +176,5 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( << std::chrono::duration_cast<std::chrono::seconds>(end - begin).count() << " seconds\n"; #endif - return Lines[LineNumber]; + return LineRanges; } diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index f2793d23522f1a5..cbc0cfacec20f0e 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -13,6 +13,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Frontend/CodeSnippetHighlighter.h" #include "clang/Lex/Lexer.h" +#include "clang/Lex/Preprocessor.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ConvertUTF.h" @@ -1250,7 +1251,7 @@ void TextDiagnostic::emitSnippetAndCaret( // Emit what we have computed. emitSnippet(SourceLine, MaxLineNoDisplayWidth, FID, SM, LineNo, - DisplayLineNo); + DisplayLineNo, LineStart); if (!CaretLine.empty()) { indentForLineNumbers(); @@ -1281,9 +1282,10 @@ void TextDiagnostic::emitSnippetAndCaret( void TextDiagnostic::emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, FileID FID, const SourceManager &SM, unsigned LineNo, - unsigned DisplayLineNo) { - std::vector<StyleRange> Styles = - SnippetHighlighter.highlightLine(LineNo - 1, PP, LangOpts, FID, SM); + unsigned DisplayLineNo, + const char *LineStart) { + llvm::SmallVector<StyleRange> Styles = SnippetHighlighter.highlightLine( + LineNo - 1, PP, LangOpts, FID, SM, LineStart); // Emit line number. if (MaxLineNoDisplayWidth > 0) { diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 64f54c6fc6382f2..d865326bcfa6dda 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -546,6 +546,7 @@ void Preprocessor::EnterMainSourceFile() { // information) and predefined macros aren't guaranteed to be set properly. assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); FileID MainFileID = SourceMgr.getMainFileID(); + // llvm::errs() << "##### Main source file: " << (int)MainFileID << "\n"; // If MainFileID is loaded it means we loaded an AST file, no need to enter // a main file. @@ -862,6 +863,32 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { return true; } +void Preprocessor::saveCheckPoint(const char *P) { + static constexpr ptrdiff_t Limit = 1000; + if (CheckPoints.empty()) { + CheckPoints.push_back(P); + return; + } + + const char *Cur = CheckPoints.back(); + if (Cur == P) + return; + if ((P - Cur) > Limit) + CheckPoints.push_back(P); +} + +const char *Preprocessor::getSaveFor(const char *S) const { + const char *C = S; + // FIXME: Use std::lower_bound or something smart. Aaron knows what I'm + // talking about. + for (ssize_t I = CheckPoints.size() - 1; I >= 0; --I) { + C = CheckPoints[I]; + if (CheckPoints[I] <= S) + break; + } + return C; +} + void Preprocessor::Lex(Token &Result) { ++LexLevel; >From d90addbc705aa2d73cd5ef62f4e36d86fdb2975b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Sat, 7 Oct 2023 12:35:21 +0200 Subject: [PATCH 11/21] Add missing license header --- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 28b66d4c05b8c8b..042745fc639ec39 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -1,3 +1,10 @@ +//===-- CodeSnippetHighlighter.cpp - Code snippet highlighting --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// #include "clang/Frontend/CodeSnippetHighlighter.h" #include "clang/Basic/DiagnosticOptions.h" >From 34454b1301f215c0909240ffa159b3c0ec19b8a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Tue, 10 Oct 2023 14:04:43 +0200 Subject: [PATCH 12/21] Fewer checkpoints --- clang/lib/Lex/Preprocessor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index d865326bcfa6dda..f5366d04a09e5ad 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -864,7 +864,7 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { } void Preprocessor::saveCheckPoint(const char *P) { - static constexpr ptrdiff_t Limit = 1000; + static constexpr ptrdiff_t Limit = 1024 * 8; if (CheckPoints.empty()) { CheckPoints.push_back(P); return; >From 8a52203291547eabc467e2015a55ae787cc44e92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Fri, 13 Oct 2023 11:11:06 +0200 Subject: [PATCH 13/21] Cleanup --- clang/lib/Lex/Preprocessor.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index f5366d04a09e5ad..bc57cd663d04fd8 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -546,7 +546,6 @@ void Preprocessor::EnterMainSourceFile() { // information) and predefined macros aren't guaranteed to be set properly. assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); FileID MainFileID = SourceMgr.getMainFileID(); - // llvm::errs() << "##### Main source file: " << (int)MainFileID << "\n"; // If MainFileID is loaded it means we loaded an AST file, no need to enter // a main file. @@ -878,15 +877,8 @@ void Preprocessor::saveCheckPoint(const char *P) { } const char *Preprocessor::getSaveFor(const char *S) const { - const char *C = S; - // FIXME: Use std::lower_bound or something smart. Aaron knows what I'm - // talking about. - for (ssize_t I = CheckPoints.size() - 1; I >= 0; --I) { - C = CheckPoints[I]; - if (CheckPoints[I] <= S) - break; - } - return C; + auto It = llvm::lower_bound(CheckPoints, S, std::less<const char *>()); + return *It; } void Preprocessor::Lex(Token &Result) { >From 9ae040ce75f74965a45f25e7c0d1dcaf349f112e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Sun, 15 Oct 2023 17:00:36 +0200 Subject: [PATCH 14/21] Address some review comments --- clang/include/clang/Frontend/CodeSnippetHighlighter.h | 2 ++ clang/lib/Frontend/CodeSnippetHighlighter.cpp | 9 +++++---- clang/lib/Frontend/TextDiagnostic.cpp | 1 - 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h index 451a182b3e35317..cb3c96f69293795 100644 --- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -19,6 +19,8 @@ struct StyleRange { unsigned Start; unsigned End; enum llvm::raw_ostream::Colors Color; + StyleRange(unsigned S, unsigned E, enum llvm::raw_ostream::Colors C) + : Start(S), End(E), Color(C){}; }; class Preprocessor; diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 042745fc639ec39..30c4c791cb4f824 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Frontend/CodeSnippetHighlighter.h" +#include "clang/Basic/CharInfo.h" #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" @@ -49,13 +50,13 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine( const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent); assert(II); if (II->isKeyword(LangOpts)) - Vec.push_back(StyleRange{Start, Start + Length, KeywordColor}); + Vec.emplace_back(Start, Start + Length, KeywordColor); } } else if (tok::isLiteral(T.getKind())) { - Vec.push_back(StyleRange{Start, Start + Length, LiteralColor}); + Vec.emplace_back(Start, Start + Length, LiteralColor); } else { assert(T.is(tok::comment)); - Vec.push_back(StyleRange{Start, Start + Length, CommentColor}); + Vec.emplace_back(Start, Start + Length, CommentColor); } }; @@ -131,7 +132,7 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine( unsigned LineLength = 0; for (unsigned I = 0; I <= Spelling.size(); ++I) { // This line is done. - if (Spelling[I] == '\n' || Spelling[I] == '\r' || I == Spelling.size()) { + if (isVerticalWhitespace(Spelling[I]) || I == Spelling.size()) { if (StartLine + L == LineNumber) { if (L == 0) // First line appendStyle(LineRanges, T, StartCol, LineLength); diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index cbc0cfacec20f0e..033d21656b12724 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -20,7 +20,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Locale.h" #include "llvm/Support/Path.h" -#include "llvm/Support/Process.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <optional> >From cf372fb0944bd8cb1f3ee113e232fe248d22db20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Mon, 16 Oct 2023 07:21:41 +0200 Subject: [PATCH 15/21] Fix highlighting and add another assertion --- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 1 + clang/lib/Lex/Preprocessor.cpp | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 30c4c791cb4f824..7a3fdc1e0d16ea8 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -72,6 +72,7 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine( size_t Offset = Save - Buff->getBufferStart(); assert(Save >= Buff->getBufferStart()); assert(Save <= Buff->getBufferEnd()); + assert(Save <= LineStart); L.seek(Offset, /*IsAtStartOfLine=*/true); } diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index bc57cd663d04fd8..bc8302d7fb9489a 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -876,9 +876,19 @@ void Preprocessor::saveCheckPoint(const char *P) { CheckPoints.push_back(P); } +/// We want to always return a value lower than \p S. +/// If there is no such checkpoint, return nullptr. const char *Preprocessor::getSaveFor(const char *S) const { - auto It = llvm::lower_bound(CheckPoints, S, std::less<const char *>()); - return *It; + const char *Result = nullptr; + for (ssize_t I = CheckPoints.size() - 1; I >= 0; --I) { + const char *C = CheckPoints[I]; + if (C <= S) { + Result = C; + break; + } + } + + return Result; } void Preprocessor::Lex(Token &Result) { >From dab2d9afd1488b525399b1f76301d5f0782dbc28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Mon, 16 Oct 2023 07:51:10 +0200 Subject: [PATCH 16/21] Change colors one last time To match those used in LLDB --- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 7a3fdc1e0d16ea8..316a151c9943c90 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -18,9 +18,9 @@ using namespace clang; -static constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN; -static constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN; -static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; +static constexpr raw_ostream::Colors CommentColor = raw_ostream::MAGENTA; +static constexpr raw_ostream::Colors LiteralColor = raw_ostream::RED; +static constexpr raw_ostream::Colors KeywordColor = raw_ostream::GREEN; llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine( unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts, @@ -45,7 +45,7 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine( // Special case true/false/nullptr literals, since they will otherwise be // treated as keywords. if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") { - Vec.push_back(StyleRange{Start, Start + Length, LiteralColor}); + Vec.emplace_back(Start, Start + Length, LiteralColor); } else { const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent); assert(II); >From 696138896feae4a8aa6bbf5350bb94e051e01fd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Tue, 17 Oct 2023 06:13:12 +0200 Subject: [PATCH 17/21] Address review comments --- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 2 +- clang/lib/Frontend/TextDiagnostic.cpp | 15 +++++---------- clang/lib/Lex/Preprocessor.cpp | 16 ++++++---------- 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 316a151c9943c90..715c113d519438d 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -20,7 +20,7 @@ using namespace clang; static constexpr raw_ostream::Colors CommentColor = raw_ostream::MAGENTA; static constexpr raw_ostream::Colors LiteralColor = raw_ostream::RED; -static constexpr raw_ostream::Colors KeywordColor = raw_ostream::GREEN; +static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine( unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts, diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index 033d21656b12724..1378b3a0812e598 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -1306,18 +1306,13 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine, if (!WasPrintable) HighlightingEnabled = false; - // FIXME: I hope we can do this in some nicer way. if (HighlightingEnabled) { - std::optional<enum raw_ostream::Colors> H; - for (auto &P : Styles) { - if (P.Start < I && P.End >= I) { - H = P.Color; - break; - } - } + const auto *CharStyle = llvm::find_if(Styles, [I](const StyleRange &R) { + return (R.Start < I && R.End >= I); + }); - if (H) - OS.changeColor(*H, false); + if (CharStyle != Styles.end()) + OS.changeColor(CharStyle->Color, false); else OS.resetColor(); } diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index bc8302d7fb9489a..c7ee33e7b31bd06 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -163,6 +163,8 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, PreambleConditionalStack.startRecording(); MaxTokens = LangOpts.MaxTokens; + + CheckPoints.push_back(nullptr); } Preprocessor::~Preprocessor() { @@ -862,17 +864,11 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { return true; } +static constexpr ptrdiff_t CheckPointLimit = 1024 * 8; void Preprocessor::saveCheckPoint(const char *P) { - static constexpr ptrdiff_t Limit = 1024 * 8; - if (CheckPoints.empty()) { - CheckPoints.push_back(P); - return; - } - - const char *Cur = CheckPoints.back(); - if (Cur == P) - return; - if ((P - Cur) > Limit) + assert(!CheckPoints.empty()); + assert(CheckPoints.back() != P); + if ((P - CheckPoints.back()) > CheckPointLimit) CheckPoints.push_back(P); } >From 628edb13197891ab3530cc529890576f07c69cfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Tue, 17 Oct 2023 07:44:11 +0200 Subject: [PATCH 18/21] Rename lexer API --- clang/include/clang/Lex/Preprocessor.h | 9 ++++++--- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 2 +- clang/lib/Lex/Preprocessor.cpp | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 07c44794520f667..05bf87d584b8db4 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -141,10 +141,7 @@ class Preprocessor { std::unique_ptr<ScratchBuffer> ScratchBuf; HeaderSearch &HeaderInfo; ModuleLoader &TheModuleLoader; - llvm::SmallVector<const char *> CheckPoints; - void saveCheckPoint(const char *P); - const char *getSaveFor(const char *S) const; /// External source of macros. ExternalPreprocessorSource *ExternalSource; @@ -1323,6 +1320,11 @@ class Preprocessor { OnToken = std::move(F); } + /// Returns a pointer into the main file's buffer that's guaranteed to be + /// after a fully lexed token. This can be used to partially lex a file + /// without starting in the middle of a token. + const char *getCompleteTokenCheckpoint(const char *P) const; + void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; } bool isMacroDefined(StringRef Id) { @@ -2263,6 +2265,7 @@ class Preprocessor { const char *getCurLexerEndPos(); void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod); + void saveCheckPoint(const char *P); public: void PoisonSEHIdentifiers(bool Poison = true); // Borland diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 715c113d519438d..1292469f80a0734 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -67,7 +67,7 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine( L.SetKeepWhitespaceMode(true); // Seek to the last save point before the start of the line. - if (const char *Save = PP->getSaveFor(LineStart); + if (const char *Save = PP->getCompleteTokenCheckpoint(LineStart); Buff->getBufferStart() <= Save && Save < Buff->getBufferEnd()) { size_t Offset = Save - Buff->getBufferStart(); assert(Save >= Buff->getBufferStart()); diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index c7ee33e7b31bd06..7dc96e686d0619d 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -874,11 +874,11 @@ void Preprocessor::saveCheckPoint(const char *P) { /// We want to always return a value lower than \p S. /// If there is no such checkpoint, return nullptr. -const char *Preprocessor::getSaveFor(const char *S) const { +const char *Preprocessor::getCompleteTokenCheckpoint(const char *P) const { const char *Result = nullptr; for (ssize_t I = CheckPoints.size() - 1; I >= 0; --I) { const char *C = CheckPoints[I]; - if (C <= S) { + if (C <= P) { Result = C; break; } >From e6bc81ba6f877cb0784c064cce97131f22ee3f88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Fri, 20 Oct 2023 09:43:46 +0200 Subject: [PATCH 19/21] Just don't highlight in files >1MB --- clang/include/clang/Lex/Preprocessor.h | 7 ---- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 34 ++++++++----------- clang/lib/Lex/Preprocessor.cpp | 25 -------------- 3 files changed, 15 insertions(+), 51 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 05bf87d584b8db4..b1c2807e35a3149 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -141,7 +141,6 @@ class Preprocessor { std::unique_ptr<ScratchBuffer> ScratchBuf; HeaderSearch &HeaderInfo; ModuleLoader &TheModuleLoader; - llvm::SmallVector<const char *> CheckPoints; /// External source of macros. ExternalPreprocessorSource *ExternalSource; @@ -1320,11 +1319,6 @@ class Preprocessor { OnToken = std::move(F); } - /// Returns a pointer into the main file's buffer that's guaranteed to be - /// after a fully lexed token. This can be used to partially lex a file - /// without starting in the middle of a token. - const char *getCompleteTokenCheckpoint(const char *P) const; - void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; } bool isMacroDefined(StringRef Id) { @@ -2265,7 +2259,6 @@ class Preprocessor { const char *getCurLexerEndPos(); void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod); - void saveCheckPoint(const char *P); public: void PoisonSEHIdentifiers(bool Poison = true); // Borland diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 1292469f80a0734..a1ca68227323469 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -18,9 +18,15 @@ using namespace clang; -static constexpr raw_ostream::Colors CommentColor = raw_ostream::MAGENTA; -static constexpr raw_ostream::Colors LiteralColor = raw_ostream::RED; +// Magenta is taken for 'warning'. Red is already 'error' and 'cya' +// is already taken for 'note'. Green is already used to underline +// source ranges. White and black are bad because of the usual +// terminal backgrounds. Which leaves us only with TWO options. +static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW; +static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; +/// Maximum size of file we still highlight. +static constexpr size_t MaxBufferSize = 1024 * 1024; // 1MB. llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine( unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts, @@ -35,6 +41,13 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine( if (PP->getIdentifierTable().getExternalIdentifierLookup()) return {}; + auto Buff = SM.getBufferOrNone(FID); + if (!Buff || Buff->getBufferSize() > MaxBufferSize) + return {}; + + Lexer L = Lexer(FID, *Buff, SM, LangOpts); + L.SetKeepWhitespaceMode(true); + size_t NTokens = 0; // Classify the given token and append it to the given vector. auto appendStyle = [PP, &LangOpts](llvm::SmallVector<StyleRange> &Vec, @@ -60,23 +73,6 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine( } }; - // Figure out where to start lexing from. - auto Buff = SM.getBufferOrNone(FID); - assert(Buff); - Lexer L = Lexer(FID, *Buff, SM, LangOpts); - L.SetKeepWhitespaceMode(true); - - // Seek to the last save point before the start of the line. - if (const char *Save = PP->getCompleteTokenCheckpoint(LineStart); - Buff->getBufferStart() <= Save && Save < Buff->getBufferEnd()) { - size_t Offset = Save - Buff->getBufferStart(); - assert(Save >= Buff->getBufferStart()); - assert(Save <= Buff->getBufferEnd()); - assert(Save <= LineStart); - - L.seek(Offset, /*IsAtStartOfLine=*/true); - } - llvm::SmallVector<StyleRange> LineRanges; bool Stop = false; while (!Stop) { diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 7dc96e686d0619d..64f54c6fc6382f2 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -163,8 +163,6 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, PreambleConditionalStack.startRecording(); MaxTokens = LangOpts.MaxTokens; - - CheckPoints.push_back(nullptr); } Preprocessor::~Preprocessor() { @@ -864,29 +862,6 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { return true; } -static constexpr ptrdiff_t CheckPointLimit = 1024 * 8; -void Preprocessor::saveCheckPoint(const char *P) { - assert(!CheckPoints.empty()); - assert(CheckPoints.back() != P); - if ((P - CheckPoints.back()) > CheckPointLimit) - CheckPoints.push_back(P); -} - -/// We want to always return a value lower than \p S. -/// If there is no such checkpoint, return nullptr. -const char *Preprocessor::getCompleteTokenCheckpoint(const char *P) const { - const char *Result = nullptr; - for (ssize_t I = CheckPoints.size() - 1; I >= 0; --I) { - const char *C = CheckPoints[I]; - if (C <= P) { - Result = C; - break; - } - } - - return Result; -} - void Preprocessor::Lex(Token &Result) { ++LexLevel; >From 5a6fcfccb7c7d7859bd586b0b8c7d13f67747b38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Sun, 29 Oct 2023 09:22:47 +0100 Subject: [PATCH 20/21] Fix a typo --- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index a1ca68227323469..90ab5a4927efb39 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -18,7 +18,7 @@ using namespace clang; -// Magenta is taken for 'warning'. Red is already 'error' and 'cya' +// Magenta is taken for 'warning'. Red is already 'error' and 'cyan' // is already taken for 'note'. Green is already used to underline // source ranges. White and black are bad because of the usual // terminal backgrounds. Which leaves us only with TWO options. >From 05a9dedcfb3a50c13be2eb5ba281246d3d086745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Wed, 8 Nov 2023 11:36:51 +0100 Subject: [PATCH 21/21] Address review comment --- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 90ab5a4927efb39..73d3b9f195bdcea 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -45,7 +45,7 @@ llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine( if (!Buff || Buff->getBufferSize() > MaxBufferSize) return {}; - Lexer L = Lexer(FID, *Buff, SM, LangOpts); + Lexer L{FID, *Buff, SM, LangOpts}; L.SetKeepWhitespaceMode(true); size_t NTokens = 0; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits