https://github.com/sstwcw updated https://github.com/llvm/llvm-project/pull/196454
>From 2dc3b91c05eba297d6779fc0fc5ec071fdb9dd3f Mon Sep 17 00:00:00 2001 From: sstwcw <[email protected]> Date: Fri, 8 May 2026 01:05:57 +0000 Subject: [PATCH 1/2] [clang-format] Use the C style for .h files Previously d7921de8027eec19 made the program use the style for C++ to fix bug #158704. The bug was mostly about C++ structures in `.h` files being parsed wrong. This new patch makes the program use the style for C but parse the file as C++. Fixes #167673. The downside of the patch is that things that are only found in C may get parsed wrong if they occur in a `.h` file. Headers are usually not so complicated. --- clang/include/clang/Format/Format.h | 6 +- clang/lib/Format/Format.cpp | 38 +++--- clang/unittests/Format/ConfigParseTest.cpp | 6 +- clang/unittests/Format/FormatTest.cpp | 138 ++++++++++++--------- 4 files changed, 109 insertions(+), 79 deletions(-) diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 98400a1609b6a..30198c024e82f 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -6493,8 +6493,10 @@ getStyle(StringRef StyleName, StringRef FileName, StringRef FallbackStyle, llvm::SourceMgr::DiagHandlerTy DiagHandler = nullptr); // Guesses the language from the ``FileName`` and ``Code`` to be formatted. -// Defaults to FormatStyle::LK_Cpp. -FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code); +// Defaults to FormatStyle::LK_Cpp. The second return value is true when the +// `.h` file can be either C or C++. +std::pair<FormatStyle::LanguageKind, bool> guessLanguage(StringRef FileName, + StringRef Code); // Returns a string representation of ``Language``. inline StringRef getLanguageName(FormatStyle::LanguageKind Language) { diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 2147a812e27c1..4c511723891f7 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -2447,7 +2447,10 @@ std::error_code parseConfiguration(llvm::MemoryBufferRef Config, llvm::SourceMgr::DiagHandlerTy DiagHandler, void *DiagHandlerCtxt, bool IsDotHFile) { assert(Style); - FormatStyle::LanguageKind Language = Style->Language; + // Use the C style for .h files. If no C style is configured, the block down + // below will make the style fall back to the C++ one. + FormatStyle::LanguageKind Language = + IsDotHFile ? FormatStyle::LK_C : Style->Language; assert(Language != FormatStyle::LK_None); if (Config.getBuffer().trim().empty()) return make_error_code(ParseError::Success); @@ -2487,7 +2490,6 @@ std::error_code parseConfiguration(llvm::MemoryBufferRef Config, int LanguagePos = -1; // Position of the style for Language. int CppPos = -1; // Position of the style for C++. - int CPos = -1; // Position of the style for C. // Search Styles for Language and store the positions of C++ and C styles in // case Language is not found. @@ -2499,19 +2501,14 @@ std::error_code parseConfiguration(llvm::MemoryBufferRef Config, } if (Lang == FormatStyle::LK_Cpp) CppPos = I; - else if (Lang == FormatStyle::LK_C) - CPos = I; } // If Language is not found, use the default style if there is one. Otherwise, - // use the C style for C++ .h files and for backward compatibility, the C++ - // style for .c files. + // for backward compatibility, the C++ style for .c and .h files. if (LanguagePos < 0) { if (Styles[0].Language == FormatStyle::LK_None) // Default style. LanguagePos = 0; - else if (IsDotHFile && Language == FormatStyle::LK_Cpp) - LanguagePos = CPos; - else if (!IsDotHFile && Language == FormatStyle::LK_C) + else if (Language == FormatStyle::LK_C) LanguagePos = CppPos; if (LanguagePos < 0) return make_error_code(ParseError::Unsuitable); @@ -2527,6 +2524,10 @@ std::error_code parseConfiguration(llvm::MemoryBufferRef Config, Style->Language = Language; Style->StyleSet.Add(*Style); } + // When the styles for C and C++ are both configured, C++ structures in .h + // files should still be recognized. + if (IsDotHFile && CppPos >= 0) + Style->Language = FormatStyle::LK_Cpp; if (Style->InsertTrailingCommas != FormatStyle::TCS_None && (Style->PackArguments.BinPack == FormatStyle::BPAS_BinPack || @@ -4573,26 +4574,29 @@ static FormatStyle::LanguageKind getLanguageByComment(const Environment &Env) { return FormatStyle::LK_None; } -FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) { +std::pair<FormatStyle::LanguageKind, bool> guessLanguage(StringRef FileName, + StringRef Code) { const auto GuessedLanguage = getLanguageByFileName(FileName); if (GuessedLanguage == FormatStyle::LK_Cpp) { auto Extension = llvm::sys::path::extension(FileName); + const bool IsH = Extension == ".h"; // If there's no file extension (or it's .h), we need to check the contents // of the code to see if it contains Objective-C. - if (!Code.empty() && (Extension.empty() || Extension == ".h")) { + if (!Code.empty() && (Extension.empty() || IsH)) { auto NonEmptyFileName = FileName.empty() ? "guess.h" : FileName; Environment Env(Code, NonEmptyFileName, /*Ranges=*/{}); if (const auto Language = getLanguageByComment(Env); Language != FormatStyle::LK_None) { - return Language; + return {Language, false}; } ObjCHeaderStyleGuesser Guesser(Env, getLLVMStyle()); Guesser.process(); if (Guesser.isObjC()) - return FormatStyle::LK_ObjC; + return {FormatStyle::LK_ObjC, false}; } + return {GuessedLanguage, IsH}; } - return GuessedLanguage; + return {GuessedLanguage, false}; } // Update StyleOptionHelpDescription above when changing this. @@ -4622,7 +4626,9 @@ Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName, llvm::vfs::FileSystem *FS, bool AllowUnknownOptions, llvm::SourceMgr::DiagHandlerTy DiagHandler) { - FormatStyle Style = getLLVMStyle(guessLanguage(FileName, Code)); + // Only true for .h files that don't have the language line. + const auto [GuessedLanguage, IsDotHFile] = guessLanguage(FileName, Code); + FormatStyle Style = getLLVMStyle(GuessedLanguage); FormatStyle FallbackStyle = getNoStyle(); if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle)) return make_string_error("Invalid fallback style: " + FallbackStyleName); @@ -4649,8 +4655,6 @@ Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName, FS = llvm::vfs::getRealFileSystem().get(); assert(FS); - const bool IsDotHFile = FileName.ends_with(".h"); - // User provided clang-format file using -style=file:path/to/format/file. if (Style.InheritConfig.empty() && StyleName.starts_with_insensitive("file:")) { diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp index 64b0e8702872c..32c3cefde0dbd 100644 --- a/clang/unittests/Format/ConfigParseTest.cpp +++ b/clang/unittests/Format/ConfigParseTest.cpp @@ -1482,16 +1482,20 @@ TEST(ConfigParseTest, HandleDotHFile) { ParseError::Success); EXPECT_EQ(Style.Language, FormatStyle::LK_C); + // Different styles are configured for C++ and C. The program should recognize + // C++ structures but use the C style for .h files. Style = {}; Style.Language = FormatStyle::LK_Cpp; EXPECT_EQ(parseConfiguration("Language: Cpp\n" "...\n" - "Language: C", + "Language: C\n" + "IndentWidth: 4", &Style, /*AllowUnknownOptions=*/false, /*IsDotHFile=*/true), ParseError::Success); EXPECT_EQ(Style.Language, FormatStyle::LK_Cpp); + EXPECT_EQ(Style.IndentWidth, 4u); } TEST(ConfigParseTest, UsesLanguageForBasedOnStyle) { diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index f5e496652e15e..e66492f738b7d 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -22411,95 +22411,105 @@ TEST_F(FormatTest, StructuredBindings) { } TEST_F(FormatTest, FileAndCode) { - EXPECT_EQ(FormatStyle::LK_C, guessLanguage("foo.c", "")); - EXPECT_EQ(FormatStyle::LK_C, guessLanguage("foo.c.in", "")); - EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.cc", "")); - EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.m", "")); - EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.mm", "")); - EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", "")); + EXPECT_EQ(FormatStyle::LK_C, guessLanguage("foo.c", "").first); + EXPECT_EQ(FormatStyle::LK_C, guessLanguage("foo.c.in", "").first); + EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.cc", "").first); + EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.m", "").first); + EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.mm", "").first); + EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", "").first); EXPECT_EQ(FormatStyle::LK_ObjC, - guessLanguage("foo.h", "@interface Foo\n@end")); + guessLanguage("foo.h", "@interface Foo\n@end").first); EXPECT_EQ( FormatStyle::LK_ObjC, - guessLanguage("foo.h", "#define TRY(x, y) @try { x; } @finally { y; }")); + guessLanguage("foo.h", "#define TRY(x, y) @try { x; } @finally { y; }") + .first); EXPECT_EQ(FormatStyle::LK_ObjC, - guessLanguage("foo.h", "#define AVAIL(x) @available(x, *))")); - EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.h", "@class Foo;")); - EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo", "")); - EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo", "@interface Foo\n@end")); + guessLanguage("foo.h", "#define AVAIL(x) @available(x, *))").first); + EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.h", "@class Foo;").first); + EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo", "").first); EXPECT_EQ(FormatStyle::LK_ObjC, - guessLanguage("foo.h", "int DoStuff(CGRect rect);")); + guessLanguage("foo", "@interface Foo\n@end").first); EXPECT_EQ(FormatStyle::LK_ObjC, - guessLanguage( - "foo.h", "#define MY_POINT_MAKE(x, y) CGPointMake((x), (y));")); + guessLanguage("foo.h", "int DoStuff(CGRect rect);").first); + EXPECT_EQ(FormatStyle::LK_ObjC, + guessLanguage("foo.h", + "#define MY_POINT_MAKE(x, y) CGPointMake((x), (y));") + .first); EXPECT_EQ( FormatStyle::LK_Cpp, - guessLanguage("foo.h", "#define FOO(...) auto bar = [] __VA_ARGS__;")); + guessLanguage("foo.h", "#define FOO(...) auto bar = [] __VA_ARGS__;") + .first); // Only one of the two preprocessor regions has ObjC-like code. EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.h", "#if A\n" "#define B() C\n" "#else\n" "#define B() [NSString a:@\"\"]\n" - "#endif")); + "#endif") + .first); } TEST_F(FormatTest, GuessLanguageWithCpp11AttributeSpecifiers) { - EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", "[[noreturn]];")); + EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", "[[noreturn]];").first); EXPECT_EQ(FormatStyle::LK_ObjC, - guessLanguage("foo.h", "array[[calculator getIndex]];")); - EXPECT_EQ(FormatStyle::LK_Cpp, - guessLanguage("foo.h", "[[noreturn, deprecated(\"so sorry\")]];")); + guessLanguage("foo.h", "array[[calculator getIndex]];").first); EXPECT_EQ( FormatStyle::LK_Cpp, - guessLanguage("foo.h", "[[noreturn, deprecated(\"gone, sorry\")]];")); + guessLanguage("foo.h", "[[noreturn, deprecated(\"so sorry\")]];").first); + EXPECT_EQ(FormatStyle::LK_Cpp, + guessLanguage("foo.h", "[[noreturn, deprecated(\"gone, sorry\")]];") + .first); EXPECT_EQ(FormatStyle::LK_ObjC, - guessLanguage("foo.h", "[[noreturn foo] bar];")); + guessLanguage("foo.h", "[[noreturn foo] bar];").first); EXPECT_EQ(FormatStyle::LK_Cpp, - guessLanguage("foo.h", "[[clang::fallthrough]];")); + guessLanguage("foo.h", "[[clang::fallthrough]];").first); EXPECT_EQ(FormatStyle::LK_ObjC, - guessLanguage("foo.h", "[[clang:fallthrough] foo];")); + guessLanguage("foo.h", "[[clang:fallthrough] foo];").first); EXPECT_EQ(FormatStyle::LK_Cpp, - guessLanguage("foo.h", "[[gsl::suppress(\"type\")]];")); + guessLanguage("foo.h", "[[gsl::suppress(\"type\")]];").first); EXPECT_EQ(FormatStyle::LK_Cpp, - guessLanguage("foo.h", "[[using clang: fallthrough]];")); + guessLanguage("foo.h", "[[using clang: fallthrough]];").first); EXPECT_EQ(FormatStyle::LK_ObjC, - guessLanguage("foo.h", "[[abusing clang:fallthrough] bar];")); + guessLanguage("foo.h", "[[abusing clang:fallthrough] bar];").first); EXPECT_EQ(FormatStyle::LK_Cpp, - guessLanguage("foo.h", "[[using gsl: suppress(\"type\")]];")); - EXPECT_EQ( - FormatStyle::LK_Cpp, - guessLanguage("foo.h", "for (auto &&[endpoint, stream] : streams_)")); + guessLanguage("foo.h", "[[using gsl: suppress(\"type\")]];").first); + EXPECT_EQ(FormatStyle::LK_Cpp, + guessLanguage("foo.h", "for (auto &&[endpoint, stream] : streams_)") + .first); EXPECT_EQ( FormatStyle::LK_Cpp, guessLanguage("foo.h", - "[[clang::callable_when(\"unconsumed\", \"unknown\")]]")); - EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", "[[foo::bar, ...]]")); + "[[clang::callable_when(\"unconsumed\", \"unknown\")]]") + .first); + EXPECT_EQ(FormatStyle::LK_Cpp, + guessLanguage("foo.h", "[[foo::bar, ...]]").first); } TEST_F(FormatTest, GuessLanguageWithCaret) { - EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", "FOO(^);")); - EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", "FOO(^, Bar);")); - EXPECT_EQ(FormatStyle::LK_ObjC, - guessLanguage("foo.h", "int(^)(char, float);")); + EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", "FOO(^);").first); + EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", "FOO(^, Bar);").first); EXPECT_EQ(FormatStyle::LK_ObjC, - guessLanguage("foo.h", "int(^foo)(char, float);")); + guessLanguage("foo.h", "int(^)(char, float);").first); EXPECT_EQ(FormatStyle::LK_ObjC, - guessLanguage("foo.h", "int(^foo[10])(char, float);")); + guessLanguage("foo.h", "int(^foo)(char, float);").first); EXPECT_EQ(FormatStyle::LK_ObjC, - guessLanguage("foo.h", "int(^foo[kNumEntries])(char, float);")); + guessLanguage("foo.h", "int(^foo[10])(char, float);").first); + EXPECT_EQ( + FormatStyle::LK_ObjC, + guessLanguage("foo.h", "int(^foo[kNumEntries])(char, float);").first); EXPECT_EQ( FormatStyle::LK_ObjC, - guessLanguage("foo.h", "int(^foo[(kNumEntries + 10)])(char, float);")); + guessLanguage("foo.h", "int(^foo[(kNumEntries + 10)])(char, float);") + .first); } TEST_F(FormatTest, GuessLanguageWithPragmas) { EXPECT_EQ(FormatStyle::LK_Cpp, - guessLanguage("foo.h", "__pragma(warning(disable:))")); + guessLanguage("foo.h", "__pragma(warning(disable:))").first); EXPECT_EQ(FormatStyle::LK_Cpp, - guessLanguage("foo.h", "#pragma(warning(disable:))")); + guessLanguage("foo.h", "#pragma(warning(disable:))").first); EXPECT_EQ(FormatStyle::LK_Cpp, - guessLanguage("foo.h", "_Pragma(warning(disable:))")); + guessLanguage("foo.h", "_Pragma(warning(disable:))").first); } TEST_F(FormatTest, FormatsInlineAsmSymbolicNames) { @@ -22532,63 +22542,73 @@ TEST_F(FormatTest, GuessedLanguageWithInlineAsmClobbers) { " asm (\"mov %[e], %[d]\"\n" " : [d] \"=rm\" (d)\n" " [e] \"rm\" (*e));\n" - "}")); + "}") + .first); EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", "void f() {\n" " _asm (\"mov %[e], %[d]\"\n" " : [d] \"=rm\" (d)\n" " [e] \"rm\" (*e));\n" - "}")); + "}") + .first); EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", "void f() {\n" " __asm (\"mov %[e], %[d]\"\n" " : [d] \"=rm\" (d)\n" " [e] \"rm\" (*e));\n" - "}")); + "}") + .first); EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", "void f() {\n" " __asm__ (\"mov %[e], %[d]\"\n" " : [d] \"=rm\" (d)\n" " [e] \"rm\" (*e));\n" - "}")); + "}") + .first); EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", "void f() {\n" " asm (\"mov %[e], %[d]\"\n" " : [d] \"=rm\" (d),\n" " [e] \"rm\" (*e));\n" - "}")); + "}") + .first); EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.h", "void f() {\n" " asm volatile (\"mov %[e], %[d]\"\n" " : [d] \"=rm\" (d)\n" " [e] \"rm\" (*e));\n" - "}")); + "}") + .first); } TEST_F(FormatTest, GuessLanguageWithChildLines) { EXPECT_EQ(FormatStyle::LK_Cpp, - guessLanguage("foo.h", "#define FOO ({ std::string s; })")); + guessLanguage("foo.h", "#define FOO ({ std::string s; })").first); EXPECT_EQ(FormatStyle::LK_ObjC, - guessLanguage("foo.h", "#define FOO ({ NSString *s; })")); + guessLanguage("foo.h", "#define FOO ({ NSString *s; })").first); EXPECT_EQ( FormatStyle::LK_Cpp, - guessLanguage("foo.h", "#define FOO ({ foo(); ({ std::string s; }) })")); + guessLanguage("foo.h", "#define FOO ({ foo(); ({ std::string s; }) })") + .first); EXPECT_EQ( FormatStyle::LK_ObjC, - guessLanguage("foo.h", "#define FOO ({ foo(); ({ NSString *s; }) })")); + guessLanguage("foo.h", "#define FOO ({ foo(); ({ NSString *s; }) })") + .first); } TEST_F(FormatTest, GetLanguageByComment) { - EXPECT_EQ(FormatStyle::LK_C, + EXPECT_EQ(std::make_pair(FormatStyle::LK_Cpp, true), + guessLanguage("foo.h", "int i;")); + EXPECT_EQ(std::make_pair(FormatStyle::LK_C, false), guessLanguage("foo.h", "// clang-format Language: C\n" "int i;")); - EXPECT_EQ(FormatStyle::LK_C, + EXPECT_EQ(std::make_pair(FormatStyle::LK_C, false), guessLanguage("foo.h.in", "// clang-format Language: C\n" "int i;")); - EXPECT_EQ(FormatStyle::LK_Cpp, + EXPECT_EQ(std::make_pair(FormatStyle::LK_Cpp, false), guessLanguage("foo.h", "// clang-format Language: Cpp\n" "int DoStuff(CGRect rect);")); - EXPECT_EQ(FormatStyle::LK_ObjC, + EXPECT_EQ(std::make_pair(FormatStyle::LK_ObjC, false), guessLanguage("foo.h", "// clang-format Language: ObjC\n" "int i;")); } >From 7c34a654698a6d5f4a86f2142a9a76403362fb25 Mon Sep 17 00:00:00 2001 From: sstwcw <[email protected]> Date: Sun, 10 May 2026 02:41:36 +0000 Subject: [PATCH 2/2] Handle the default style --- clang/lib/Format/Format.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 4c511723891f7..8cd30d35f915f 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -2447,10 +2447,7 @@ std::error_code parseConfiguration(llvm::MemoryBufferRef Config, llvm::SourceMgr::DiagHandlerTy DiagHandler, void *DiagHandlerCtxt, bool IsDotHFile) { assert(Style); - // Use the C style for .h files. If no C style is configured, the block down - // below will make the style fall back to the C++ one. - FormatStyle::LanguageKind Language = - IsDotHFile ? FormatStyle::LK_C : Style->Language; + FormatStyle::LanguageKind Language = Style->Language; assert(Language != FormatStyle::LK_None); if (Config.getBuffer().trim().empty()) return make_error_code(ParseError::Success); @@ -2495,7 +2492,9 @@ std::error_code parseConfiguration(llvm::MemoryBufferRef Config, // case Language is not found. for (unsigned I = 0; I < StyleCount; ++I) { const auto Lang = Styles[I].Language; - if (Lang == Language) { + // Use the C style for .h files. If no C style is configured, the block down + // below will make the style fall back to the C++ one. + if (Lang == (IsDotHFile ? FormatStyle::LK_C : Language)) { LanguagePos = I; break; } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
