Author: Michael Jabbour
Date: 2025-07-28T17:42:30+08:00
New Revision: 6c2caa63d7d2929765199a66a61660f5372f01c7

URL: 
https://github.com/llvm/llvm-project/commit/6c2caa63d7d2929765199a66a61660f5372f01c7
DIFF: 
https://github.com/llvm/llvm-project/commit/6c2caa63d7d2929765199a66a61660f5372f01c7.diff

LOG: [Serialization] Fix crash while lazy-loading template specializations 
(#150430)

## Problem

This is a regression that was observed in Clang 20 on modules code that
uses import std.

The lazy-loading mechanism for template specializations introduced in
#119333 can currently load additional nodes when called multiple times,
which breaks assumptions made by code that iterates over
specializations. This leads to iterator invalidation crashes in some
scenarios.

The core issue occurs when:
1. Code calls `spec_begin()` to get an iterator over template
specializations. This invokes `LoadLazySpecializations()`.
2. Code then calls `spec_end()` to get the end iterator.
3. During the `spec_end()` call, `LoadExternalSpecializations()` is
invoked again.
4. This can load additional specializations for certain cases,
invalidating the begin iterator returned in 1.

I was able to trigger the problem when constructing a ParentMapContext.
The regression test demonstrates two ways to trigger the construction of
the ParentMapContext on problematic code:
- The ArrayBoundV2 checker
- Unsigned overflow detection in sanitized builds

Unfortunately, simply dumping the ast (e.g. using `-ast-dump-all`)
doesn't trigger the crash because dumping requires completing the redecl
chain before iterating over the specializations.

## Solution

The fix ensures that the redeclaration chain is always completed
**before** loading external specializations by calling
`CompleteRedeclChain(D)` at the start of
`LoadExternalSpecializations()`. The idea is to ensure that all
`SpecLookups` are fully known and loaded before the call to
`LoadExternalSpecializationsImpl()`.

Added: 
    clang/test/Modules/specializations-lazy-load-parentmap-crash.cpp

Modified: 
    clang/lib/Serialization/ASTReader.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 10aedb68fcd9d..f896f9f11c2b3 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -8488,6 +8488,7 @@ bool 
ASTReader::LoadExternalSpecializationsImpl(SpecLookupTableTy &SpecLookups,
 bool ASTReader::LoadExternalSpecializations(const Decl *D, bool OnlyPartial) {
   assert(D);
 
+  CompleteRedeclChain(D);
   bool NewSpecsFound =
       LoadExternalSpecializationsImpl(PartialSpecializationsLookups, D);
   if (OnlyPartial)

diff  --git a/clang/test/Modules/specializations-lazy-load-parentmap-crash.cpp 
b/clang/test/Modules/specializations-lazy-load-parentmap-crash.cpp
new file mode 100644
index 0000000000000..bd07ada631355
--- /dev/null
+++ b/clang/test/Modules/specializations-lazy-load-parentmap-crash.cpp
@@ -0,0 +1,99 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file --leading-lines %s %t
+//
+// Prepare the BMIs.
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu 
-emit-module-interface -o %t/mod_a-part1.pcm %t/mod_a-part1.cppm
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu 
-emit-module-interface -o %t/mod_a-part2.pcm %t/mod_a-part2.cppm
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu 
-emit-module-interface -o %t/mod_a.pcm %t/mod_a.cppm 
-fmodule-file=mod_a:part2=%t/mod_a-part2.pcm 
-fmodule-file=mod_a:part1=%t/mod_a-part1.pcm
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu 
-emit-module-interface -o %t/mod_b.pcm %t/mod_b.cppm 
-fmodule-file=mod_a:part2=%t/mod_a-part2.pcm -fmodule-file=mod_a=%t/mod_a.pcm 
-fmodule-file=mod_a:part1=%t/mod_a-part1.pcm
+
+// Below are two examples to trigger the construction of the parent map (which 
is necessary to trigger the bug this regression test is for).
+// Using ArrayBoundV2 checker:
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -analyze 
-analyzer-checker=security,alpha.security -analyzer-output=text 
%t/test-array-bound-v2.cpp -fmodule-file=mod_a:part2=%t/mod_a-part2.pcm 
-fmodule-file=mod_a=%t/mod_a.pcm -fmodule-file=mod_a:part1=%t/mod_a-part1.pcm 
-fmodule-file=mod_b=%t/mod_b.pcm
+// Using a sanitized build:
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu 
-fsanitize=unsigned-integer-overflow 
-fsanitize-undefined-ignore-overflow-pattern=all -emit-llvm -o %t/ignored 
%t/test-sanitized-build.cpp -fmodule-file=mod_a:part2=%t/mod_a-part2.pcm 
-fmodule-file=mod_a=%t/mod_a.pcm -fmodule-file=mod_a:part1=%t/mod_a-part1.pcm 
-fmodule-file=mod_b=%t/mod_b.pcm
+
+//--- mod_a-part1.cppm
+module;
+namespace mod_a {
+template <int> struct Important;
+}
+
+namespace mod_a {
+Important<0>& instantiate1();
+} // namespace mod_a
+export module mod_a:part1;
+
+export namespace mod_a {
+using ::mod_a::instantiate1;
+}
+
+//--- mod_a-part2.cppm
+module;
+namespace mod_a {
+template <int> struct Important;
+}
+
+namespace mod_a {
+template <int N> Important<N>& instantiate2();
+namespace part2InternalInstantiations {
+// During the construction of the parent map, we iterate over 
ClassTemplateDecl::specializations() for 'Important'.
+// After GH119333, the following instantiations get loaded between the call to 
spec_begin() and spec_end().
+// This used to invalidate the begin iterator returned by spec_begin() by the 
time the end iterator is returned.
+// This is a regression test for that.
+Important<1> fn1();
+Important<2> fn2();
+Important<3> fn3();
+Important<4> fn4();
+Important<5> fn5();
+Important<6> fn6();
+Important<7> fn7();
+Important<8> fn8();
+Important<9> fn9();
+Important<10> fn10();
+Important<11> fn11();
+}
+} // namespace mod_a
+export module mod_a:part2;
+
+export namespace mod_a {
+using ::mod_a::instantiate2;
+}
+
+//--- mod_a.cppm
+export module mod_a;
+export import :part1;
+export import :part2;
+
+//--- mod_b.cppm
+export module mod_b;
+import mod_a;
+
+void a() {
+  mod_a::instantiate1();
+  mod_a::instantiate2<42>();
+}
+
+//--- test-array-bound-v2.cpp
+import mod_b;
+
+extern void someFunc(char* first, char* last);
+void triggerParentMapContextCreationThroughArrayBoundV2() {
+  // This code currently causes the ArrayBoundV2 checker to create the 
ParentMapContext.
+  // Once it detects an access to buf[100], the checker looks through the 
parents to find '&' operator.
+  // (this is needed since taking the address of past-the-end pointer is 
allowed by the checker)
+  char buf[100];
+  someFunc(&buf[0], &buf[100]);
+}
+
+//--- test-sanitized-build.cpp
+import mod_b;
+
+extern void some();
+void triggerParentMapContextCreationThroughSanitizedBuild(unsigned i) {
+  // This code currently causes UBSan to create the ParentMapContext.
+  // UBSan currently excludes the pattern below to avoid noise, and it relies 
on ParentMapContext to detect it.
+  while (i--)
+    some();
+}


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to