[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-06-03 Thread Ilya Biryukov via cfe-commits

https://github.com/ilya-biryukov closed 
https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-06-03 Thread Ilya Biryukov via cfe-commits

https://github.com/ilya-biryukov updated 
https://github.com/llvm/llvm-project/pull/79875

>From db8b091568c924a9550052ea643f718ac11d3e73 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov 
Date: Mon, 29 Jan 2024 18:55:53 +0100
Subject: [PATCH 1/4] [Serialization] Check for stack exhaustion when reading
 declarations

Particular example that lead to this is a very long chain of
`UsingShadowDecl`s that we hit in our codebase in generated code.

To avoid that, check for stack exhaustion when deserializing the
declaration. At that point, we can point to source location of a
particular declaration that is being deserialized.
---
 clang/lib/Serialization/ASTReaderDecl.cpp |  4 +-
 .../Modules/lots-of-using-shadow-decls.cpp| 43 +++
 2 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Modules/lots-of-using-shadow-decls.cpp

diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp 
b/clang/lib/Serialization/ASTReaderDecl.cpp
index 61cc99d4df687..ba363c0d773a1 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -4127,7 +4127,9 @@ Decl *ASTReader::ReadDeclRecord(GlobalDeclID ID) {
   // calls to Decl::getASTContext() by Decl's methods will find the
   // TranslationUnitDecl without crashing.
   D->setDeclContext(Context.getTranslationUnitDecl());
-  Reader.Visit(D);
+
+  // Reading some declarations can result in deep recursion.
+  SemaObj->runWithSufficientStackSpace(DeclLoc, [&] { Reader.Visit(D); });
 
   // If this declaration is also a declaration context, get the
   // offsets for its tables of lexical and visible declarations.
diff --git a/clang/test/Modules/lots-of-using-shadow-decls.cpp 
b/clang/test/Modules/lots-of-using-shadow-decls.cpp
new file mode 100644
index 0..c3048352842e3
--- /dev/null
+++ b/clang/test/Modules/lots-of-using-shadow-decls.cpp
@@ -0,0 +1,43 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/usings.cppm -o 
%t/usings.pcm
+// RUN: %clang_cc1 -std=c++20 -fmodule-file=usings=%t/usings.pcm %t/use.cpp 
-verify -fsyntax-only -Wno-stack-exhausted
+
+// expected-no-diagnostics
+
+//--- usings.cppm
+export module usings;
+
+#define TYPES1(NAME) DECLARE(NAME##a) DECLARE(NAME##b) DECLARE(NAME##c) \
+DECLARE(NAME##d) DECLARE(NAME##e) DECLARE(NAME##f) DECLARE(NAME##g) \
+DECLARE(NAME##h) DECLARE(NAME##i) DECLARE(NAME##j) 
+#define TYPES2(NAME) TYPES1(NAME##a) TYPES1(NAME##b) TYPES1(NAME##c) \
+TYPES1(NAME##d) TYPES1(NAME##e) TYPES1(NAME##f) TYPES1(NAME##g) \
+TYPES1(NAME##h) TYPES1(NAME##i) TYPES1(NAME##j) 
+#define TYPES3(NAME) TYPES2(NAME##a) TYPES2(NAME##b) TYPES2(NAME##c) \
+TYPES2(NAME##d) TYPES2(NAME##e) TYPES2(NAME##f) TYPES2(NAME##g) \
+TYPES2(NAME##h) TYPES2(NAME##i) TYPES2(NAME##j) 
+#define TYPES4(NAME) TYPES3(NAME##a) TYPES3(NAME##b) TYPES3(NAME##c) \
+TYPES3(NAME##d) TYPES3(NAME##e) TYPES3(NAME##f) TYPES3(NAME##g)
+
+#define DECLARE(NAME) struct NAME {};
+TYPES4(Type)
+
+export struct Base {
+#undef DECLARE
+#define DECLARE(NAME) void func(NAME*);
+TYPES4(Type)
+};
+
+export struct Derived : Base {
+using Base::func;
+};
+
+//--- use.cpp
+import usings;
+void test() {
+Derived().func(nullptr); // expected-error{{ambiguous}}
+// expected-note@* + {{candidate function}}
+}

>From fac778f48eca1cf7ec1456465f5c7a7b0691323d Mon Sep 17 00:00:00 2001
From: Ilya Biryukov 
Date: Wed, 29 May 2024 17:05:18 +0200
Subject: [PATCH 2/4] fixup! [Serialization] Check for stack exhaustion when
 reading declarations

Use clang::runWithSufficientStackSpace, fallback to Sema only when
available.
---
 clang/include/clang/Serialization/ASTReader.h |  5 +
 clang/lib/Serialization/ASTReader.cpp | 15 +++
 clang/lib/Serialization/ASTReaderDecl.cpp |  4 +++-
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index 4ece4593f0738..bea58d60d36c4 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -429,6 +429,9 @@ class ASTReader
   FileManager 
   const PCHContainerReader 
   DiagnosticsEngine 
+  // Sema has duplicate logic, but SemaObj can sometimes be null so ASTReader
+  // has its own version.
+  bool WarnedStackExhausted = false;
 
   /// The semantic analysis object that will be processing the
   /// AST files and the translation unit that uses it.
@@ -2135,6 +2138,8 @@ class ASTReader
   /// Report a diagnostic.
   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const;
 
+  void warnStackExhausted(SourceLocation Loc);
+
   IdentifierInfo *DecodeIdentifierInfo(serialization::IdentifierID ID);
 
   IdentifierInfo *readIdentifier(ModuleFile , const RecordData ,
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 

[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-06-03 Thread Ilya Biryukov via cfe-commits

ilya-biryukov wrote:

I have tried `ulimit` and found that Clang relies on a very intricate balance 
between the stack size, the desired stack size (the cutoff at which we create a 
new thread with a larger stack) and the points in code where we call 
`runWithSufficientStackSpace`.

If the stack limit is very small, we crash on some other code path or even do 
something weird in the code path checking for stack exhaustion (it currently 
checks if there's less than 256K stack available, clearly not the right thing 
if I request a stack of 128K with ulimit in the first place).
If the stack limit is larger, the amount of code required to trigger the crash 
is high and the test is too slow for checking in (IMO).

I think @ChuanqiXu9 's suggestion of making the parameters 
(desireableStackSize, in particular) of these checks customizable for unit 
testing would allow to have a more fine-grained knob and actually have a test 
for this. I'll try to experiment with this approach and report my findings.

In the meantime, I have addressed the last comment and plan to land this 
without tests for now.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-06-03 Thread Ilya Biryukov via cfe-commits

https://github.com/ilya-biryukov updated 
https://github.com/llvm/llvm-project/pull/79875

>From 6e474e393e63fd1cb5f3b0bea3c971b96591c57f Mon Sep 17 00:00:00 2001
From: Ilya Biryukov 
Date: Mon, 29 Jan 2024 18:55:53 +0100
Subject: [PATCH 1/4] [Serialization] Check for stack exhaustion when reading
 declarations

Particular example that lead to this is a very long chain of
`UsingShadowDecl`s that we hit in our codebase in generated code.

To avoid that, check for stack exhaustion when deserializing the
declaration. At that point, we can point to source location of a
particular declaration that is being deserialized.
---
 clang/lib/Serialization/ASTReaderDecl.cpp |  4 +-
 .../Modules/lots-of-using-shadow-decls.cpp| 43 +++
 2 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Modules/lots-of-using-shadow-decls.cpp

diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp 
b/clang/lib/Serialization/ASTReaderDecl.cpp
index 61cc99d4df687..ba363c0d773a1 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -4127,7 +4127,9 @@ Decl *ASTReader::ReadDeclRecord(GlobalDeclID ID) {
   // calls to Decl::getASTContext() by Decl's methods will find the
   // TranslationUnitDecl without crashing.
   D->setDeclContext(Context.getTranslationUnitDecl());
-  Reader.Visit(D);
+
+  // Reading some declarations can result in deep recursion.
+  SemaObj->runWithSufficientStackSpace(DeclLoc, [&] { Reader.Visit(D); });
 
   // If this declaration is also a declaration context, get the
   // offsets for its tables of lexical and visible declarations.
diff --git a/clang/test/Modules/lots-of-using-shadow-decls.cpp 
b/clang/test/Modules/lots-of-using-shadow-decls.cpp
new file mode 100644
index 0..c3048352842e3
--- /dev/null
+++ b/clang/test/Modules/lots-of-using-shadow-decls.cpp
@@ -0,0 +1,43 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/usings.cppm -o 
%t/usings.pcm
+// RUN: %clang_cc1 -std=c++20 -fmodule-file=usings=%t/usings.pcm %t/use.cpp 
-verify -fsyntax-only -Wno-stack-exhausted
+
+// expected-no-diagnostics
+
+//--- usings.cppm
+export module usings;
+
+#define TYPES1(NAME) DECLARE(NAME##a) DECLARE(NAME##b) DECLARE(NAME##c) \
+DECLARE(NAME##d) DECLARE(NAME##e) DECLARE(NAME##f) DECLARE(NAME##g) \
+DECLARE(NAME##h) DECLARE(NAME##i) DECLARE(NAME##j) 
+#define TYPES2(NAME) TYPES1(NAME##a) TYPES1(NAME##b) TYPES1(NAME##c) \
+TYPES1(NAME##d) TYPES1(NAME##e) TYPES1(NAME##f) TYPES1(NAME##g) \
+TYPES1(NAME##h) TYPES1(NAME##i) TYPES1(NAME##j) 
+#define TYPES3(NAME) TYPES2(NAME##a) TYPES2(NAME##b) TYPES2(NAME##c) \
+TYPES2(NAME##d) TYPES2(NAME##e) TYPES2(NAME##f) TYPES2(NAME##g) \
+TYPES2(NAME##h) TYPES2(NAME##i) TYPES2(NAME##j) 
+#define TYPES4(NAME) TYPES3(NAME##a) TYPES3(NAME##b) TYPES3(NAME##c) \
+TYPES3(NAME##d) TYPES3(NAME##e) TYPES3(NAME##f) TYPES3(NAME##g)
+
+#define DECLARE(NAME) struct NAME {};
+TYPES4(Type)
+
+export struct Base {
+#undef DECLARE
+#define DECLARE(NAME) void func(NAME*);
+TYPES4(Type)
+};
+
+export struct Derived : Base {
+using Base::func;
+};
+
+//--- use.cpp
+import usings;
+void test() {
+Derived().func(nullptr); // expected-error{{ambiguous}}
+// expected-note@* + {{candidate function}}
+}

>From ede787adb74ded8a5145facd9518fe3d1824604a Mon Sep 17 00:00:00 2001
From: Ilya Biryukov 
Date: Wed, 29 May 2024 17:05:18 +0200
Subject: [PATCH 2/4] fixup! [Serialization] Check for stack exhaustion when
 reading declarations

Use clang::runWithSufficientStackSpace, fallback to Sema only when
available.
---
 clang/include/clang/Serialization/ASTReader.h |  5 +
 clang/lib/Serialization/ASTReader.cpp | 15 +++
 clang/lib/Serialization/ASTReaderDecl.cpp |  4 +++-
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index 4ece4593f0738..bea58d60d36c4 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -429,6 +429,9 @@ class ASTReader
   FileManager 
   const PCHContainerReader 
   DiagnosticsEngine 
+  // Sema has duplicate logic, but SemaObj can sometimes be null so ASTReader
+  // has its own version.
+  bool WarnedStackExhausted = false;
 
   /// The semantic analysis object that will be processing the
   /// AST files and the translation unit that uses it.
@@ -2135,6 +2138,8 @@ class ASTReader
   /// Report a diagnostic.
   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const;
 
+  void warnStackExhausted(SourceLocation Loc);
+
   IdentifierInfo *DecodeIdentifierInfo(serialization::IdentifierID ID);
 
   IdentifierInfo *readIdentifier(ModuleFile , const RecordData ,
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 

[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-06-03 Thread Ilya Biryukov via cfe-commits


@@ -9403,6 +9404,20 @@ DiagnosticBuilder ASTReader::Diag(SourceLocation Loc, 
unsigned DiagID) const {
   return Diags.Report(Loc, DiagID);
 }
 
+void ASTReader::warnStackExhausted(SourceLocation Loc) {
+  // When Sema is available, avoid duplicate errors. This should be rare.

ilya-biryukov wrote:

Good catch, I don't think this would necessarily hold in practice in the next 
few years.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-05-29 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 edited 
https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-05-29 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 approved this pull request.

If we don't insist on testing this in this commit, them LGTM.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-05-29 Thread Chuanqi Xu via cfe-commits


@@ -9403,6 +9404,20 @@ DiagnosticBuilder ASTReader::Diag(SourceLocation Loc, 
unsigned DiagID) const {
   return Diags.Report(Loc, DiagID);
 }
 
+void ASTReader::warnStackExhausted(SourceLocation Loc) {
+  // When Sema is available, avoid duplicate errors. This should be rare.

ChuanqiXu9 wrote:

```suggestion
  // When Sema is available, avoid duplicate errors.
```

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-05-29 Thread Ilya Biryukov via cfe-commits

ilya-biryukov wrote:

> there's a couple of tests that use `ulimit` 
> (`clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp` and 
> `clang/test/PCH/leakfiles.test`) - so that technique could be used to test 
> this in a way that's fast enough to check in?

oh, I didn't realize I can use `ulimit` in tests, thanks! I will try it out 
next week.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-05-29 Thread David Blaikie via cfe-commits

dwblaikie wrote:

(a bunch of compiler-rt tests also use ulimit, but doesn't look like any llvm 
core tests do... )

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-05-29 Thread David Blaikie via cfe-commits

dwblaikie wrote:

there's a couple of tests that use `ulimit` 
(`clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp` and 
`clang/test/PCH/leakfiles.test`) - so that technique could be used to test this 
in a way that's fast enough to check in?

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-05-29 Thread Ilya Biryukov via cfe-commits

ilya-biryukov wrote:

> Yeah, I was hoping to have it in the text of the discussion here without 
> having to do it myself since you've already got the repro locally, 
> presumably... so we can all see/discuss it. But perhaps it's not sufficiently 
> helpful/constructive to worry about - not sure.

@dwblaikie sorry, I was responding to the AST and somehow missed the stack 
part. I should have definitely included it from the start, my bad. Here it is:

Click to expand the stack trace

```text
#0  0x5b15abdd in clang::ASTReader::ReadDeclRecord(unsigned int) ()
#1  0x5bec8504 in clang::ASTReader::GetDecl(unsigned int) [clone .cold] 
()
#2  0x5b2028de in 
clang::ASTDeclReader::VisitUsingShadowDecl(clang::UsingShadowDecl*) ()
#3  0x5b164667 in clang::ASTReader::ReadDeclRecord(unsigned int) ()
#4  0x5bec8504 in clang::ASTReader::GetDecl(unsigned int) [clone .cold] 
()
#5  0x5b2028de in 
clang::ASTDeclReader::VisitUsingShadowDecl(clang::UsingShadowDecl*) ()
.
#11814 0x5b164667 in clang::ASTReader::ReadDeclRecord(unsigned int) ()
#11815 0x5bec8504 in clang::ASTReader::GetDecl(unsigned int) [clone 
.cold] ()
#11816 0x5b206e7a in 
clang::ASTDeclReader::VisitUsingDecl(clang::UsingDecl*) ()
#11817 0x5b164994 in clang::ASTReader::ReadDeclRecord(unsigned int) ()
#11818 0x5bae90d8 in 
clang::ASTReader::FindExternalLexicalDecls(clang::DeclContext const*, 
llvm::function_ref, 
llvm::SmallVectorImpl&) ()
#11819 0x5bb0bc69 in 
clang::DeclContext::LoadLexicalDeclsFromExternalStorage() const ()
#11820 0x5bb6b87d in 
clang::Sema::SetCtorInitializers(clang::CXXConstructorDecl*, bool, 
llvm::ArrayRef) ()
#11821 0x5bb6e84e in 
clang::Sema::DefineImplicitMoveConstructor(clang::SourceLocation, 
clang::CXXConstructorDecl*) ()
#11822 0x5961f28c in void llvm::function_ref::callback_fn(long) [clone 
.__uniq.244473529911816543723239142225852553696] [clone 
.llvm.16089407488121690861] ()
#11823 0x5ba821b8 in 
clang::Sema::MarkFunctionReferenced(clang::SourceLocation, 
clang::FunctionDecl*, bool) ()
#11824 0x5bda0be9 in 
clang::Sema::BuildCXXConstructExpr(clang::SourceLocation, clang::QualType, 
clang::NamedDecl*, clang::CXXConstructorDecl*, 
llvm::MutableArrayRef, bool, bool, bool, bool, 
clang::CXXConstructionKind, clang::SourceRange) ()
#11825 0x5ba1fcef in 
clang::InitializationSequence::Perform(clang::Sema&, clang::InitializedEntity 
const&, clang::InitializationKind const&, llvm::MutableArrayRef, 
clang::QualType*) ()
#11826 0x5c76f7dd in CollectFieldInitializer(clang::Sema&, (anonymous 
namespace)::BaseAndFieldInfo&, clang::FieldDecl*, clang::IndirectFieldDecl*) 
[clone .__uniq.266194649099037377127634867174551623720] [clone .cold] ()
#11827 0x5bb6c0ba in 
clang::Sema::SetCtorInitializers(clang::CXXConstructorDecl*, bool, 
llvm::ArrayRef) ()
#11828 0x5bb6e84e in 
clang::Sema::DefineImplicitMoveConstructor(clang::SourceLocation, 
clang::CXXConstructorDecl*) ()
#11829 0x5961f28c in void llvm::function_ref::callback_fn(long) [clone 
.__uniq.244473529911816543723239142225852553696] [clone 
.llvm.16089407488121690861] ()
#11830 0x5ba821b8 in 
clang::Sema::MarkFunctionReferenced(clang::SourceLocation, 
clang::FunctionDecl*, bool) ()
#11831 0x5bda0be9 in 
clang::Sema::BuildCXXConstructExpr(clang::SourceLocation, clang::QualType, 
clang::NamedDecl*, clang::CXXConstructorDecl*, 
llvm::MutableArrayRef, bool, bool, bool, bool, 
clang::CXXConstructionKind, clang::SourceRange) ()
#11832 0x5ba1fcef in 
clang::InitializationSequence::Perform(clang::Sema&, clang::InitializedEntity 
const&, clang::InitializationKind const&, llvm::MutableArrayRef, 
clang::QualType*) ()
#11833 0x5bb6dfd8 in BuildImplicitBaseInitializer(clang::Sema&, 
clang::CXXConstructorDecl*, ImplicitInitializerKind, clang::CXXBaseSpecifier*, 
bool, clang::CXXCtorInitializer*&) [clone 
.__uniq.266194649099037377127634867174551623720] ()
#11834 0x5bb6b798 in 
clang::Sema::SetCtorInitializers(clang::CXXConstructorDecl*, bool, 
llvm::ArrayRef) ()
#11835 0x5bb6e84e in 
clang::Sema::DefineImplicitMoveConstructor(clang::SourceLocation, 
clang::CXXConstructorDecl*) ()
#11836 0x5961f28c in void llvm::function_ref::callback_fn(long) [clone 
.__uniq.244473529911816543723239142225852553696] [clone 
.llvm.16089407488121690861] ()
#11837 0x5ba821b8 in 
clang::Sema::MarkFunctionReferenced(clang::SourceLocation, 
clang::FunctionDecl*, bool) ()
#11838 0x5bda0be9 in 
clang::Sema::BuildCXXConstructExpr(clang::SourceLocation, clang::QualType, 
clang::NamedDecl*, clang::CXXConstructorDecl*, 
llvm::MutableArrayRef, bool, bool, bool, bool, 
clang::CXXConstructionKind, clang::SourceRange) ()
#11839 0x5ba1fcef in 
clang::InitializationSequence::Perform(clang::Sema&, clang::InitializedEntity 
const&, 

[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-05-29 Thread Ilya Biryukov via cfe-commits


@@ -4099,7 +4099,9 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
   // calls to Decl::getASTContext() by Decl's methods will find the
   // TranslationUnitDecl without crashing.
   D->setDeclContext(Context.getTranslationUnitDecl());
-  Reader.Visit(D);
+
+  // Reading some declarations can result in deep recursion.
+  SemaObj->runWithSufficientStackSpace(DeclLoc, [&] { Reader.Visit(D); });

ilya-biryukov wrote:

I did not realize Sema can be null, I have updated the code as you suggested, 
but it now duplicates the logic we have in Sema to avoid showing the error 
multiple times.
Please take a look and see if the new version makes sense.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-05-29 Thread Ilya Biryukov via cfe-commits

https://github.com/ilya-biryukov updated 
https://github.com/llvm/llvm-project/pull/79875

>From 6e474e393e63fd1cb5f3b0bea3c971b96591c57f Mon Sep 17 00:00:00 2001
From: Ilya Biryukov 
Date: Mon, 29 Jan 2024 18:55:53 +0100
Subject: [PATCH 1/3] [Serialization] Check for stack exhaustion when reading
 declarations

Particular example that lead to this is a very long chain of
`UsingShadowDecl`s that we hit in our codebase in generated code.

To avoid that, check for stack exhaustion when deserializing the
declaration. At that point, we can point to source location of a
particular declaration that is being deserialized.
---
 clang/lib/Serialization/ASTReaderDecl.cpp |  4 +-
 .../Modules/lots-of-using-shadow-decls.cpp| 43 +++
 2 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Modules/lots-of-using-shadow-decls.cpp

diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp 
b/clang/lib/Serialization/ASTReaderDecl.cpp
index 61cc99d4df687..ba363c0d773a1 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -4127,7 +4127,9 @@ Decl *ASTReader::ReadDeclRecord(GlobalDeclID ID) {
   // calls to Decl::getASTContext() by Decl's methods will find the
   // TranslationUnitDecl without crashing.
   D->setDeclContext(Context.getTranslationUnitDecl());
-  Reader.Visit(D);
+
+  // Reading some declarations can result in deep recursion.
+  SemaObj->runWithSufficientStackSpace(DeclLoc, [&] { Reader.Visit(D); });
 
   // If this declaration is also a declaration context, get the
   // offsets for its tables of lexical and visible declarations.
diff --git a/clang/test/Modules/lots-of-using-shadow-decls.cpp 
b/clang/test/Modules/lots-of-using-shadow-decls.cpp
new file mode 100644
index 0..c3048352842e3
--- /dev/null
+++ b/clang/test/Modules/lots-of-using-shadow-decls.cpp
@@ -0,0 +1,43 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/usings.cppm -o 
%t/usings.pcm
+// RUN: %clang_cc1 -std=c++20 -fmodule-file=usings=%t/usings.pcm %t/use.cpp 
-verify -fsyntax-only -Wno-stack-exhausted
+
+// expected-no-diagnostics
+
+//--- usings.cppm
+export module usings;
+
+#define TYPES1(NAME) DECLARE(NAME##a) DECLARE(NAME##b) DECLARE(NAME##c) \
+DECLARE(NAME##d) DECLARE(NAME##e) DECLARE(NAME##f) DECLARE(NAME##g) \
+DECLARE(NAME##h) DECLARE(NAME##i) DECLARE(NAME##j) 
+#define TYPES2(NAME) TYPES1(NAME##a) TYPES1(NAME##b) TYPES1(NAME##c) \
+TYPES1(NAME##d) TYPES1(NAME##e) TYPES1(NAME##f) TYPES1(NAME##g) \
+TYPES1(NAME##h) TYPES1(NAME##i) TYPES1(NAME##j) 
+#define TYPES3(NAME) TYPES2(NAME##a) TYPES2(NAME##b) TYPES2(NAME##c) \
+TYPES2(NAME##d) TYPES2(NAME##e) TYPES2(NAME##f) TYPES2(NAME##g) \
+TYPES2(NAME##h) TYPES2(NAME##i) TYPES2(NAME##j) 
+#define TYPES4(NAME) TYPES3(NAME##a) TYPES3(NAME##b) TYPES3(NAME##c) \
+TYPES3(NAME##d) TYPES3(NAME##e) TYPES3(NAME##f) TYPES3(NAME##g)
+
+#define DECLARE(NAME) struct NAME {};
+TYPES4(Type)
+
+export struct Base {
+#undef DECLARE
+#define DECLARE(NAME) void func(NAME*);
+TYPES4(Type)
+};
+
+export struct Derived : Base {
+using Base::func;
+};
+
+//--- use.cpp
+import usings;
+void test() {
+Derived().func(nullptr); // expected-error{{ambiguous}}
+// expected-note@* + {{candidate function}}
+}

>From ede787adb74ded8a5145facd9518fe3d1824604a Mon Sep 17 00:00:00 2001
From: Ilya Biryukov 
Date: Wed, 29 May 2024 17:05:18 +0200
Subject: [PATCH 2/3] fixup! [Serialization] Check for stack exhaustion when
 reading declarations

Use clang::runWithSufficientStackSpace, fallback to Sema only when
available.
---
 clang/include/clang/Serialization/ASTReader.h |  5 +
 clang/lib/Serialization/ASTReader.cpp | 15 +++
 clang/lib/Serialization/ASTReaderDecl.cpp |  4 +++-
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index 4ece4593f0738..bea58d60d36c4 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -429,6 +429,9 @@ class ASTReader
   FileManager 
   const PCHContainerReader 
   DiagnosticsEngine 
+  // Sema has duplicate logic, but SemaObj can sometimes be null so ASTReader
+  // has its own version.
+  bool WarnedStackExhausted = false;
 
   /// The semantic analysis object that will be processing the
   /// AST files and the translation unit that uses it.
@@ -2135,6 +2138,8 @@ class ASTReader
   /// Report a diagnostic.
   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const;
 
+  void warnStackExhausted(SourceLocation Loc);
+
   IdentifierInfo *DecodeIdentifierInfo(serialization::IdentifierID ID);
 
   IdentifierInfo *readIdentifier(ModuleFile , const RecordData ,
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 

[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-05-28 Thread via cfe-commits

cor3ntin wrote:

@ilya-biryukov ping!

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-05-08 Thread Aaron Ballman via cfe-commits

AaronBallman wrote:

> I think we should apply @ChuanqiXu9 suggestion and merge that without tests. 
> At worse it's harmless, at best it solves an actual issue for users. We have 
> precedent for not being able to test resource exhaustion fixes.

I would not be opposed to landing with a test, but I think such a test would be 
challenging because stack exhaustion is often slow to trigger and highly 
machine-dependent (so I can imagine the test failing "randomly" for different 
configurations or under different workloads, or being a very slow test to 
execute). So if there's a simple, quick-to-execute, reliably-failing test we 
can add, then great! But if there's not, I would be fine landing without tests.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-05-08 Thread Ilya Biryukov via cfe-commits

ilya-biryukov wrote:

Sorry for loosing track of this change. I will go through the comments and try 
to land this some time next week.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-05-03 Thread via cfe-commits

cor3ntin wrote:

I think we should apply @ChuanqiXu9 suggestion and merge that without tests. At 
worse it's harmless, at best it solves an actual issue for users. We have 
precedent for not being able to test resource exhaustion fixes.


https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-30 Thread David Blaikie via cfe-commits

dwblaikie wrote:

> > Could you show the stack (omitting/annotating the repeated part) that leads 
> > to failure? and/or the AST shape that leads to failure?
> 
> See the test I added. All you need is ~10k overloads of a method in a class 
> and a `using Base::func` in the derived class. The AST represents that as a 
> linked list inside `UsingShadowDecl` (each shadow decl references the next 
> one). Serializing them works fine, but deserialization is recursive and blows 
> up.
> 
> Feel free to just call `-ast-dump` on the test I posted (it only crashes when 
> reading the pcm, so outputing AST should be fine)

Yeah, I was hoping to have it in the text of the discussion here without having 
to do it myself since you've already got the repro locally, presumably... so we 
can all see/discuss it. But perhaps it's not sufficiently helpful/constructive 
to worry about - not sure.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-30 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

> with stack exhaustion warning, the compiler can continue (albeit being slow 
> or unstable). The depth limit here will be a hard restriction and so there 
> will be no workaround if the code reaches it.

It is a surprise to me that this is only a warning instead of a hard error. I 
never thought it can continue when the stack runs out. Sorry for confusing.

> I am curious about performance concerns. I feel that the amount of work in 
> ASTReaderDecl is large enough that this extra call should not add any 
> noticeable overhead on a happy path (when we have enough stack). Or do you 
> think the overhead might be problematic?

In my imagination, with `ASTReader::NumCurrentElementsDeserializing? will only 
require us to insert a compare between intergers (the value of 
`NumCurrentElementsDeserializing` and the threshold) on the hot path. So it 
will be simpler than the current method. But given you said above, maybe it 
doesn't matter so much. I believe the overhead is under control.

> Except the error should ideally be happening when we write PCM, but that 
> seems hard

What's the meaning? Isn't this patch about reading?

> Just to make sure I got everything right this time. So you mean testing the 
> clang::runWithSufficientStackSpace itself? That seems very reasonable. If 
> we're talking about unit-testing the ASTReaderDecl, this seems like it's 
> quite a bit of work (too much setup code that needs to be put around it).

I mean something like:

First, refactor the signature of `clang::runWithSufficientStackSpace` to:


```
namespace clang {
clang::runWithSufficientStackSpace(function_call_back_for_diagnostic, 
function_to_run, desired_stack_size = default_value, sufficient_stack_size = 
default_value);
}
```

then add two setters to ASTReader

```
class ASTReader {
std::optional desired_stack_size;
std::optional sufficient_stack_size;
public:
 void set_desired_stack_size(size_t) {...}
 void set_sufficient_stack_size(size_t) {...}
}
```

then in the unittest under `clang/unittests/Serialization`, we can generate the 
BMI for the codes in your current patch (but with smaller scale) like other 
unit test did.

And we can try to parse the `use.cpp` in the current patch, we need to 
construct the CompilerInstance and get the ASTReader and set the corresponding 
value for desired_stack_size and sufficient_stack_size. Since we're calling 
`clang::runWithSufficientStackSpace` with a call back to the diagnostic 
function, it is easy to test the diagnostic function is called or it shouldn't 
be hard to test the actual diagnostic message.

This is the code in my mind. I don't feel it would be pretty hard. How do you 
think about it?



https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-30 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

> I am not saying it's a bad idea to add this for testing purposes, but given 
> how fragile this approach is, I think we should not provide configuration 
> knobs to users there. At least everyone sees crashes at roughly the same 
> points right now (although variation is still present because depending on 
> inlining decisions stacks sizes might vary even with the same code and same 
> limits).

Sorry for not being clear enough. What I want is to add parameters to 
`clang::runWithSufficientStackSpace` and we can invoke it and test this in 
unittest. It won't be visible to users.

BTW, how about the idea of using ASTReader::NumCurrentElementsDeserializing?


https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-30 Thread Chuanqi Xu via cfe-commits


@@ -4099,7 +4099,9 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
   // calls to Decl::getASTContext() by Decl's methods will find the
   // TranslationUnitDecl without crashing.
   D->setDeclContext(Context.getTranslationUnitDecl());
-  Reader.Visit(D);
+
+  // Reading some declarations can result in deep recursion.
+  SemaObj->runWithSufficientStackSpace(DeclLoc, [&] { Reader.Visit(D); });

ChuanqiXu9 wrote:

As I said, for example, if we're compiling a pcm file to an object file. It 
makes sense that the Sema is not needed here.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-30 Thread Ilya Biryukov via cfe-commits

ilya-biryukov wrote:

> To make this testable, maybe we can refactor 
> `clang::runWithSufficientStackSpace` a little bit to make `DesiredStackSize` 
> and `isStackNearlyExhausted::SufficientStack` configurable.

Maybe... As long as we only use this in tests.
However, for this particular case, we could also maybe track deserialization 
depth (similar to template instantiation depth).

I actually landed a patch internally that increased the stack size and it 
immediately started crashing on [another test]( 
https://github.com/llvm/llvm-project/blob/e5054fb5c660cb81d1f96498e39662914a92a167/clang-tools-extra/clangd/test/infinite-instantiation.test).
 I suspect that what happened there is that increasing the stack size causes us 
to have more recursive calls before we issue a warning and start a new thread. 
In turn, those recursive calls may lead to stack overflow before we get to the 
next call of runWithSufficientStackSpace.

I am not saying it's a bad idea to add this for testing purposes, but given how 
fragile this approach is, I think we should not provide configuration knobs to 
users there. At least everyone sees crashes at roughly the same points right 
now (although variation is still present because depending on inlining 
decisions stacks sizes might vary even with the same code and same limits).

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-30 Thread Ilya Biryukov via cfe-commits


@@ -4099,7 +4099,9 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
   // calls to Decl::getASTContext() by Decl's methods will find the
   // TranslationUnitDecl without crashing.
   D->setDeclContext(Context.getTranslationUnitDecl());
-  Reader.Visit(D);
+
+  // Reading some declarations can result in deep recursion.
+  SemaObj->runWithSufficientStackSpace(DeclLoc, [&] { Reader.Visit(D); });

ilya-biryukov wrote:

Could you elaborate in which sense it is non-meaningful for the purpose of 
calling this method?
I believe SemaObj is never null here (because it's used during deserialization 
of declarations). The only logic we have in `runWithSufficientStackSpace`, is 
to warn about stack exhaustion only once and actually show this warning.

I would still be using `Wstack-exhausted` here, not sure there is much useful 
context to add here (maybe a note about deserializing declarations, but this 
case is so rare I think we can live without it).

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-30 Thread Ilya Biryukov via cfe-commits

ilya-biryukov wrote:

> Could you show the stack (omitting/annotating the repeated part) that leads 
> to failure? and/or the AST shape that leads to failure?

See the test I added. All you need is ~10k overloads of a method in a class and 
a `using Base::func` in the derived class.\
The AST represents that as a linked list inside `UsingShadowDecl` (each shadow 
decl references the next one). Serializing them works fine, but deserialization 
is recursive and blows up.

Feel free to just call `-ast-dump` on the test I posted (it only crashes when 
reading the pcm, so outputing AST should be fine)

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-30 Thread Ilya Biryukov via cfe-commits


@@ -0,0 +1,43 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/usings.cppm -o 
%t/usings.pcm
+// RUN: %clang_cc1 -std=c++20 -fmodule-file=usings=%t/usings.pcm %t/use.cpp 
-verify -fsyntax-only -Wno-stack-exhausted
+
+// expected-no-diagnostics
+
+//--- usings.cppm
+export module usings;
+
+#define TYPES1(NAME) DECLARE(NAME##a) DECLARE(NAME##b) DECLARE(NAME##c) \
+DECLARE(NAME##d) DECLARE(NAME##e) DECLARE(NAME##f) DECLARE(NAME##g) \
+DECLARE(NAME##h) DECLARE(NAME##i) DECLARE(NAME##j) 
+#define TYPES2(NAME) TYPES1(NAME##a) TYPES1(NAME##b) TYPES1(NAME##c) \
+TYPES1(NAME##d) TYPES1(NAME##e) TYPES1(NAME##f) TYPES1(NAME##g) \
+TYPES1(NAME##h) TYPES1(NAME##i) TYPES1(NAME##j) 
+#define TYPES3(NAME) TYPES2(NAME##a) TYPES2(NAME##b) TYPES2(NAME##c) \
+TYPES2(NAME##d) TYPES2(NAME##e) TYPES2(NAME##f) TYPES2(NAME##g) \
+TYPES2(NAME##h) TYPES2(NAME##i) TYPES2(NAME##j) 
+#define TYPES4(NAME) TYPES3(NAME##a) TYPES3(NAME##b) TYPES3(NAME##c) \
+TYPES3(NAME##d) TYPES3(NAME##e) TYPES3(NAME##f) TYPES3(NAME##g)
+
+#define DECLARE(NAME) struct NAME {};
+TYPES4(Type)

ilya-biryukov wrote:

I suspected there's no easy trick, thanks for confirming.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-29 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

Another idea is to limit (or check) the threshold for 
`ASTReader::NumCurrentElementsDeserializing`. Then we still can print the stack 
trace by some utils in LLVM also we can get better performance than this. 

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-29 Thread Chuanqi Xu via cfe-commits


@@ -0,0 +1,43 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/usings.cppm -o 
%t/usings.pcm
+// RUN: %clang_cc1 -std=c++20 -fmodule-file=usings=%t/usings.pcm %t/use.cpp 
-verify -fsyntax-only -Wno-stack-exhausted
+
+// expected-no-diagnostics
+
+//--- usings.cppm
+export module usings;
+
+#define TYPES1(NAME) DECLARE(NAME##a) DECLARE(NAME##b) DECLARE(NAME##c) \
+DECLARE(NAME##d) DECLARE(NAME##e) DECLARE(NAME##f) DECLARE(NAME##g) \
+DECLARE(NAME##h) DECLARE(NAME##i) DECLARE(NAME##j) 
+#define TYPES2(NAME) TYPES1(NAME##a) TYPES1(NAME##b) TYPES1(NAME##c) \
+TYPES1(NAME##d) TYPES1(NAME##e) TYPES1(NAME##f) TYPES1(NAME##g) \
+TYPES1(NAME##h) TYPES1(NAME##i) TYPES1(NAME##j) 
+#define TYPES3(NAME) TYPES2(NAME##a) TYPES2(NAME##b) TYPES2(NAME##c) \
+TYPES2(NAME##d) TYPES2(NAME##e) TYPES2(NAME##f) TYPES2(NAME##g) \
+TYPES2(NAME##h) TYPES2(NAME##i) TYPES2(NAME##j) 
+#define TYPES4(NAME) TYPES3(NAME##a) TYPES3(NAME##b) TYPES3(NAME##c) \
+TYPES3(NAME##d) TYPES3(NAME##e) TYPES3(NAME##f) TYPES3(NAME##g)
+
+#define DECLARE(NAME) struct NAME {};
+TYPES4(Type)

ChuanqiXu9 wrote:

> I am not sure if there is an an approach that does not involve structural 
> changes to the AST (i.e. rewriting the ASTReaderDecl somehow, but keeping the 
> AST the same).

To my knowledge, it is only possible by introducing coroutines if we don't want 
to refactor the current deserializer significantly.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-29 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 commented:

To make this testable, maybe we can refactor 
`clang::runWithSufficientStackSpace`  a little bit to make `DesiredStackSize` 
and `isStackNearlyExhausted::SufficientStack` configurable.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-29 Thread Chuanqi Xu via cfe-commits


@@ -4099,7 +4099,9 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
   // calls to Decl::getASTContext() by Decl's methods will find the
   // TranslationUnitDecl without crashing.
   D->setDeclContext(Context.getTranslationUnitDecl());
-  Reader.Visit(D);
+
+  // Reading some declarations can result in deep recursion.
+  SemaObj->runWithSufficientStackSpace(DeclLoc, [&] { Reader.Visit(D); });

ChuanqiXu9 wrote:

Sema may not meaningful in ASTReader (e.g., we're compiling a pcm file). 
```suggestion
clang::runWithSufficientStackSpace(...)
```

Also in this way, we can get better diagnotsics.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-29 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 edited 
https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-29 Thread David Blaikie via cfe-commits

dwblaikie wrote:

Could you show the stack (omitting/annotating the repeated part) that leads to 
failure? and/or the AST shape that leads to failure?

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-29 Thread via cfe-commits

https://github.com/cor3ntin approved this pull request.

I think it makes sense to make this change without tests

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-29 Thread Ilya Biryukov via cfe-commits


@@ -0,0 +1,43 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/usings.cppm -o 
%t/usings.pcm
+// RUN: %clang_cc1 -std=c++20 -fmodule-file=usings=%t/usings.pcm %t/use.cpp 
-verify -fsyntax-only -Wno-stack-exhausted
+
+// expected-no-diagnostics
+
+//--- usings.cppm
+export module usings;
+
+#define TYPES1(NAME) DECLARE(NAME##a) DECLARE(NAME##b) DECLARE(NAME##c) \
+DECLARE(NAME##d) DECLARE(NAME##e) DECLARE(NAME##f) DECLARE(NAME##g) \
+DECLARE(NAME##h) DECLARE(NAME##i) DECLARE(NAME##j) 
+#define TYPES2(NAME) TYPES1(NAME##a) TYPES1(NAME##b) TYPES1(NAME##c) \
+TYPES1(NAME##d) TYPES1(NAME##e) TYPES1(NAME##f) TYPES1(NAME##g) \
+TYPES1(NAME##h) TYPES1(NAME##i) TYPES1(NAME##j) 
+#define TYPES3(NAME) TYPES2(NAME##a) TYPES2(NAME##b) TYPES2(NAME##c) \
+TYPES2(NAME##d) TYPES2(NAME##e) TYPES2(NAME##f) TYPES2(NAME##g) \
+TYPES2(NAME##h) TYPES2(NAME##i) TYPES2(NAME##j) 
+#define TYPES4(NAME) TYPES3(NAME##a) TYPES3(NAME##b) TYPES3(NAME##c) \
+TYPES3(NAME##d) TYPES3(NAME##e) TYPES3(NAME##f) TYPES3(NAME##g)
+
+#define DECLARE(NAME) struct NAME {};
+TYPES4(Type)

ilya-biryukov wrote:

The test is purely for illustrative purposes, it actually takes too long to 
run, so I think we should not put it here.
This produces 7k declarations, it was enough to have a stack exhaustion in a 
release build on my local machine.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-29 Thread Ilya Biryukov via cfe-commits

https://github.com/ilya-biryukov commented:

I think the alternative we could try to chase is turning the chain of 
using-shadow-decls into an array somehow, so we can process it in a loop 
instead of recursion.

I am not sure if there is an an approach that does **not** involve structural 
changes to the AST (i.e. rewriting the `ASTReaderDecl` somehow, but keeping the 
AST the same). Maybe someone more experienced with the serialization could 
suggest something useful here, that would probably be ideal.

https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-29 Thread Ilya Biryukov via cfe-commits

https://github.com/ilya-biryukov edited 
https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-29 Thread via cfe-commits

llvmbot wrote:



@llvm/pr-subscribers-clang-modules

@llvm/pr-subscribers-clang

Author: Ilya Biryukov (ilya-biryukov)


Changes

Particular example that lead to this is a very long chain of `UsingShadowDecl`s 
that we hit in our codebase in generated code.

To avoid that, check for stack exhaustion when deserializing the declaration. 
At that point, we can point to source location of a particular declaration that 
is being deserialized.

---
Full diff: https://github.com/llvm/llvm-project/pull/79875.diff


2 Files Affected:

- (modified) clang/lib/Serialization/ASTReaderDecl.cpp (+3-1) 
- (added) clang/test/Modules/lots-of-using-shadow-decls.cpp (+43) 


``diff
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp 
b/clang/lib/Serialization/ASTReaderDecl.cpp
index 547eb77930b4eec..e2507d7c14a15d0 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -4099,7 +4099,9 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
   // calls to Decl::getASTContext() by Decl's methods will find the
   // TranslationUnitDecl without crashing.
   D->setDeclContext(Context.getTranslationUnitDecl());
-  Reader.Visit(D);
+
+  // Reading some declarations can result in deep recursion.
+  SemaObj->runWithSufficientStackSpace(DeclLoc, [&] { Reader.Visit(D); });
 
   // If this declaration is also a declaration context, get the
   // offsets for its tables of lexical and visible declarations.
diff --git a/clang/test/Modules/lots-of-using-shadow-decls.cpp 
b/clang/test/Modules/lots-of-using-shadow-decls.cpp
new file mode 100644
index 000..c3048352842e3f9
--- /dev/null
+++ b/clang/test/Modules/lots-of-using-shadow-decls.cpp
@@ -0,0 +1,43 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/usings.cppm -o 
%t/usings.pcm
+// RUN: %clang_cc1 -std=c++20 -fmodule-file=usings=%t/usings.pcm %t/use.cpp 
-verify -fsyntax-only -Wno-stack-exhausted
+
+// expected-no-diagnostics
+
+//--- usings.cppm
+export module usings;
+
+#define TYPES1(NAME) DECLARE(NAME##a) DECLARE(NAME##b) DECLARE(NAME##c) \
+DECLARE(NAME##d) DECLARE(NAME##e) DECLARE(NAME##f) DECLARE(NAME##g) \
+DECLARE(NAME##h) DECLARE(NAME##i) DECLARE(NAME##j) 
+#define TYPES2(NAME) TYPES1(NAME##a) TYPES1(NAME##b) TYPES1(NAME##c) \
+TYPES1(NAME##d) TYPES1(NAME##e) TYPES1(NAME##f) TYPES1(NAME##g) \
+TYPES1(NAME##h) TYPES1(NAME##i) TYPES1(NAME##j) 
+#define TYPES3(NAME) TYPES2(NAME##a) TYPES2(NAME##b) TYPES2(NAME##c) \
+TYPES2(NAME##d) TYPES2(NAME##e) TYPES2(NAME##f) TYPES2(NAME##g) \
+TYPES2(NAME##h) TYPES2(NAME##i) TYPES2(NAME##j) 
+#define TYPES4(NAME) TYPES3(NAME##a) TYPES3(NAME##b) TYPES3(NAME##c) \
+TYPES3(NAME##d) TYPES3(NAME##e) TYPES3(NAME##f) TYPES3(NAME##g)
+
+#define DECLARE(NAME) struct NAME {};
+TYPES4(Type)
+
+export struct Base {
+#undef DECLARE
+#define DECLARE(NAME) void func(NAME*);
+TYPES4(Type)
+};
+
+export struct Derived : Base {
+using Base::func;
+};
+
+//--- use.cpp
+import usings;
+void test() {
+Derived().func(nullptr); // expected-error{{ambiguous}}
+// expected-note@* + {{candidate function}}
+}

``




https://github.com/llvm/llvm-project/pull/79875
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Check for stack exhaustion when reading declarations (PR #79875)

2024-01-29 Thread Ilya Biryukov via cfe-commits

https://github.com/ilya-biryukov created 
https://github.com/llvm/llvm-project/pull/79875

Particular example that lead to this is a very long chain of `UsingShadowDecl`s 
that we hit in our codebase in generated code.

To avoid that, check for stack exhaustion when deserializing the declaration. 
At that point, we can point to source location of a particular declaration that 
is being deserialized.

>From d1aaa762ae05defa94289afda163c67731fc607d Mon Sep 17 00:00:00 2001
From: Ilya Biryukov 
Date: Mon, 29 Jan 2024 18:55:53 +0100
Subject: [PATCH] [Serialization] Check for stack exhaustion when reading
 declarations

Particular example that lead to this is a very long chain of
`UsingShadowDecl`s that we hit in our codebase in generated code.

To avoid that, check for stack exhaustion when deserializing the
declaration. At that point, we can point to source location of a
particular declaration that is being deserialized.
---
 clang/lib/Serialization/ASTReaderDecl.cpp |  4 +-
 .../Modules/lots-of-using-shadow-decls.cpp| 43 +++
 2 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Modules/lots-of-using-shadow-decls.cpp

diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp 
b/clang/lib/Serialization/ASTReaderDecl.cpp
index 547eb77930b4eec..e2507d7c14a15d0 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -4099,7 +4099,9 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
   // calls to Decl::getASTContext() by Decl's methods will find the
   // TranslationUnitDecl without crashing.
   D->setDeclContext(Context.getTranslationUnitDecl());
-  Reader.Visit(D);
+
+  // Reading some declarations can result in deep recursion.
+  SemaObj->runWithSufficientStackSpace(DeclLoc, [&] { Reader.Visit(D); });
 
   // If this declaration is also a declaration context, get the
   // offsets for its tables of lexical and visible declarations.
diff --git a/clang/test/Modules/lots-of-using-shadow-decls.cpp 
b/clang/test/Modules/lots-of-using-shadow-decls.cpp
new file mode 100644
index 000..c3048352842e3f9
--- /dev/null
+++ b/clang/test/Modules/lots-of-using-shadow-decls.cpp
@@ -0,0 +1,43 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/usings.cppm -o 
%t/usings.pcm
+// RUN: %clang_cc1 -std=c++20 -fmodule-file=usings=%t/usings.pcm %t/use.cpp 
-verify -fsyntax-only -Wno-stack-exhausted
+
+// expected-no-diagnostics
+
+//--- usings.cppm
+export module usings;
+
+#define TYPES1(NAME) DECLARE(NAME##a) DECLARE(NAME##b) DECLARE(NAME##c) \
+DECLARE(NAME##d) DECLARE(NAME##e) DECLARE(NAME##f) DECLARE(NAME##g) \
+DECLARE(NAME##h) DECLARE(NAME##i) DECLARE(NAME##j) 
+#define TYPES2(NAME) TYPES1(NAME##a) TYPES1(NAME##b) TYPES1(NAME##c) \
+TYPES1(NAME##d) TYPES1(NAME##e) TYPES1(NAME##f) TYPES1(NAME##g) \
+TYPES1(NAME##h) TYPES1(NAME##i) TYPES1(NAME##j) 
+#define TYPES3(NAME) TYPES2(NAME##a) TYPES2(NAME##b) TYPES2(NAME##c) \
+TYPES2(NAME##d) TYPES2(NAME##e) TYPES2(NAME##f) TYPES2(NAME##g) \
+TYPES2(NAME##h) TYPES2(NAME##i) TYPES2(NAME##j) 
+#define TYPES4(NAME) TYPES3(NAME##a) TYPES3(NAME##b) TYPES3(NAME##c) \
+TYPES3(NAME##d) TYPES3(NAME##e) TYPES3(NAME##f) TYPES3(NAME##g)
+
+#define DECLARE(NAME) struct NAME {};
+TYPES4(Type)
+
+export struct Base {
+#undef DECLARE
+#define DECLARE(NAME) void func(NAME*);
+TYPES4(Type)
+};
+
+export struct Derived : Base {
+using Base::func;
+};
+
+//--- use.cpp
+import usings;
+void test() {
+Derived().func(nullptr); // expected-error{{ambiguous}}
+// expected-note@* + {{candidate function}}
+}

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits