https://github.com/qiongsiwu updated https://github.com/llvm/llvm-project/pull/183396
>From f3d4a8afbb342afd2b2e24af2368a9a9c9d4fed4 Mon Sep 17 00:00:00 2001 From: Qiongsi Wu <[email protected]> Date: Wed, 25 Feb 2026 13:34:42 -0800 Subject: [PATCH 1/5] Rename the temporary file the compiler creates for by-name dependency scanning. --- .../clang/DependencyScanning/DependencyScanningWorker.h | 2 +- clang/lib/DependencyScanning/DependencyScanningWorker.cpp | 8 ++++---- clang/lib/Tooling/DependencyScanningTool.cpp | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/DependencyScanning/DependencyScanningWorker.h index 9267c25c93ee9..ee67b2e16eb77 100644 --- a/clang/include/clang/DependencyScanning/DependencyScanningWorker.h +++ b/clang/include/clang/DependencyScanning/DependencyScanningWorker.h @@ -189,7 +189,7 @@ std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>, std::vector<std::string>> initVFSForByNameScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS, ArrayRef<std::string> CommandLine, - StringRef WorkingDirectory, StringRef ModuleName); + StringRef WorkingDirectory); } // end namespace dependencies } // end namespace clang diff --git a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp index 4c24642c437c3..cb9455263d63d 100644 --- a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp @@ -112,7 +112,7 @@ bool DependencyScanningWorker::computeDependencies( bool DependencyScanningWorker::initializeCompilerInstanceWithContext( StringRef CWD, ArrayRef<std::string> CommandLine, DiagnosticConsumer &DC) { auto [OverlayFS, ModifiedCommandLine] = - initVFSForByNameScanning(DepFS, CommandLine, CWD, "ScanningByName"); + initVFSForByNameScanning(DepFS, CommandLine, CWD); auto DiagEngineWithCmdAndOpts = std::make_unique<DiagnosticsEngineWithDiagOpts>(ModifiedCommandLine, OverlayFS, DC); @@ -166,8 +166,7 @@ std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>, std::vector<std::string>> dependencies::initVFSForByNameScanning( IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS, - ArrayRef<std::string> CommandLine, StringRef WorkingDirectory, - StringRef ModuleName) { + ArrayRef<std::string> CommandLine, StringRef WorkingDirectory) { // Reset what might have been modified in the previous worker invocation. BaseFS->setCurrentWorkingDirectory(WorkingDirectory); @@ -180,7 +179,8 @@ dependencies::initVFSForByNameScanning( InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory); SmallString<128> FakeInputPath; // TODO: We should retry the creation if the path already exists. - llvm::sys::fs::createUniquePath(ModuleName + "-%%%%%%%%.input", FakeInputPath, + llvm::sys::fs::createUniquePath("module-includes-%%%%%%%%.input", + FakeInputPath, /*MakeAbsolute=*/false); InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer("")); IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS; diff --git a/clang/lib/Tooling/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanningTool.cpp index 26f07286f18c4..fadef8a27aa2b 100644 --- a/clang/lib/Tooling/DependencyScanningTool.cpp +++ b/clang/lib/Tooling/DependencyScanningTool.cpp @@ -324,8 +324,8 @@ bool DependencyScanningTool::initializeWorkerCIWithContextFromCommandline( // The input command line is either a driver-style command line, or // ill-formed. In this case, we will first call the Driver to build a -cc1 // command line for this compilation or diagnose any ill-formed input. - auto [OverlayFS, ModifiedCommandLine] = initVFSForByNameScanning( - &Worker.getVFS(), CommandLine, CWD, "ScanningByName"); + auto [OverlayFS, ModifiedCommandLine] = + initVFSForByNameScanning(&Worker.getVFS(), CommandLine, CWD); auto DiagEngineWithCmdAndOpts = std::make_unique<DiagnosticsEngineWithDiagOpts>(ModifiedCommandLine, OverlayFS, DC); >From 7ce01879b63a363eef6d44c1db25809a5b5f2e1c Mon Sep 17 00:00:00 2001 From: Qiongsi Wu <[email protected]> Date: Mon, 2 Mar 2026 16:37:30 -0800 Subject: [PATCH 2/5] Fixing potential source location overflow. And reduce the length of the random string since we do not need it that wide. --- .../clang/DependencyScanning/DependencyScannerImpl.h | 10 ++++++++++ .../lib/DependencyScanning/DependencyScannerImpl.cpp | 8 ++++++++ .../DependencyScanning/DependencyScanningWorker.cpp | 12 +++++++++--- .../modules-full-by-mult-mod-names-diagnostics.c | 6 +++--- 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/DependencyScanning/DependencyScannerImpl.h b/clang/include/clang/DependencyScanning/DependencyScannerImpl.h index 4210c20942a32..27bf1ce088ac8 100644 --- a/clang/include/clang/DependencyScanning/DependencyScannerImpl.h +++ b/clang/include/clang/DependencyScanning/DependencyScannerImpl.h @@ -154,6 +154,16 @@ class CompilerInstanceWithContext { IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFS); bool computeDependencies(StringRef ModuleName, DependencyConsumer &Consumer, DependencyActionController &Controller); + + // FIXME: see if we can use a smaller fake buffer, or set the buffer's size + // dynamically. + // At the time of this commit, the estimated number of total unique importable + // names is around 3000 from Apple's SDKs. We usually import them in parallel, + // so it is unlikely that all names are all scanned by the same dependency + // scanning worker. Therefore the 64k (20x bigger than our estimate) size is + // sufficient to hold the unique source locations to report diagnostics per + // worker. + static const int32_t FakeInputBufferSize = 1 << 16; }; } // namespace dependencies } // namespace clang diff --git a/clang/lib/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/DependencyScanning/DependencyScannerImpl.cpp index 3e680f4b4b363..7708477d7abd2 100644 --- a/clang/lib/DependencyScanning/DependencyScannerImpl.cpp +++ b/clang/lib/DependencyScanning/DependencyScannerImpl.cpp @@ -936,6 +936,14 @@ bool CompilerInstanceWithContext::computeDependencies( // FIXME: Scan modules asynchronously here as well. SrcLocOffset++; + if (SrcLocOffset >= FakeInputBufferSize) { + // This should never happen. We could have replaced this by an assert, + // but reporting a fatal error is more explicit, and we know + // immediately we are overflowing. + llvm::report_fatal_error("exceeded maximum by-name scans per worker; " + "increase module-import-by-name.input's size"); + } + SmallVector<IdentifierLoc, 2> Path; IdentifierInfo *ModuleID = PP.getIdentifierInfo(ModuleName); Path.emplace_back(IDLocation, ModuleID); diff --git a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp index cb9455263d63d..3e89f543b75ce 100644 --- a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp @@ -162,6 +162,12 @@ dependencies::initVFSForTUBufferScanning( return std::make_pair(OverlayFS, ModifiedCommandLine); } +// The fake input buffer is read-only, and it is used to produce +// unique source locations for the diagnostics. Therefore sharing +// this global buffer across threads is ok. +static const std::string + FakeInput(" ", CompilerInstanceWithContext::FakeInputBufferSize); + std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>, std::vector<std::string>> dependencies::initVFSForByNameScanning( @@ -179,10 +185,10 @@ dependencies::initVFSForByNameScanning( InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory); SmallString<128> FakeInputPath; // TODO: We should retry the creation if the path already exists. - llvm::sys::fs::createUniquePath("module-includes-%%%%%%%%.input", - FakeInputPath, + llvm::sys::fs::createUniquePath("module-import-%%%%.input", FakeInputPath, /*MakeAbsolute=*/false); - InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer("")); + InMemoryFS->addFile(FakeInputPath, 0, + llvm::MemoryBuffer::getMemBuffer(FakeInput)); IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS; OverlayFS->pushOverlay(InMemoryOverlay); diff --git a/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c b/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c index 9889982354e90..fa93f5fc92fd7 100644 --- a/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c +++ b/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c @@ -27,11 +27,11 @@ module root2 { header "root2.h" } // RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s // ERROR: Error while scanning dependencies for modA: -// ERROR-NEXT: {{.*}}: fatal error: module 'modA' not found +// ERROR-NEXT: module-import-[[RAND:[a-zA-Z0-9]{4}]].input:1:1: fatal error: module 'modA' not found // ERROR-NEXT: Error while scanning dependencies for modB: -// ERROR-NEXT: {{.*}}: fatal error: module 'modB' not found +// ERROR-NEXT: module-import-[[RAND]].input:1:3: fatal error: module 'modB' not found // ERROR-NEXT: Error while scanning dependencies for modC: -// ERROR-NEXT: {{.*}}: fatal error: module 'modC' not found +// ERROR-NEXT: module-import-[[RAND]].input:1:4: fatal error: module 'modC' not found // CHECK: { // CHECK-NEXT: "modules": [ // CHECK-NEXT: { >From cfc65cd22076bd92b2c7ee219e5cc0381c5a86c9 Mon Sep 17 00:00:00 2001 From: Qiongsi Wu <[email protected]> Date: Tue, 3 Mar 2026 14:33:12 -0800 Subject: [PATCH 3/5] Remove unnecessary unique identifier from the buffer's file name. --- clang/lib/DependencyScanning/DependencyScanningWorker.cpp | 4 +--- .../modules-full-by-mult-mod-names-diagnostics.c | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp index 3e89f543b75ce..56348ac3a6332 100644 --- a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp @@ -183,10 +183,8 @@ dependencies::initVFSForByNameScanning( llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS); auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>(); InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory); - SmallString<128> FakeInputPath; + std::string FakeInputPath("module-include.input"); // TODO: We should retry the creation if the path already exists. - llvm::sys::fs::createUniquePath("module-import-%%%%.input", FakeInputPath, - /*MakeAbsolute=*/false); InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(FakeInput)); IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS; diff --git a/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c b/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c index fa93f5fc92fd7..f99690c3a7715 100644 --- a/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c +++ b/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c @@ -27,11 +27,11 @@ module root2 { header "root2.h" } // RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s // ERROR: Error while scanning dependencies for modA: -// ERROR-NEXT: module-import-[[RAND:[a-zA-Z0-9]{4}]].input:1:1: fatal error: module 'modA' not found +// ERROR-NEXT: module-include.input:1:1: fatal error: module 'modA' not found // ERROR-NEXT: Error while scanning dependencies for modB: -// ERROR-NEXT: module-import-[[RAND]].input:1:3: fatal error: module 'modB' not found +// ERROR-NEXT: module-include.input:1:3: fatal error: module 'modB' not found // ERROR-NEXT: Error while scanning dependencies for modC: -// ERROR-NEXT: module-import-[[RAND]].input:1:4: fatal error: module 'modC' not found +// ERROR-NEXT: module-include.input:1:4: fatal error: module 'modC' not found // CHECK: { // CHECK-NEXT: "modules": [ // CHECK-NEXT: { >From f1865aa35e69f170fa3292229be54185610dae4d Mon Sep 17 00:00:00 2001 From: Qiongsi Wu <[email protected]> Date: Tue, 3 Mar 2026 16:18:15 -0800 Subject: [PATCH 4/5] Address code review. --- clang/lib/DependencyScanning/DependencyScanningWorker.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp index 56348ac3a6332..59f4ca0048085 100644 --- a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp @@ -183,8 +183,7 @@ dependencies::initVFSForByNameScanning( llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS); auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>(); InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory); - std::string FakeInputPath("module-include.input"); - // TODO: We should retry the creation if the path already exists. + StringRef FakeInputPath("module-include.input"); InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(FakeInput)); IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS; >From 6c12f9a349abdf79cb5e29f502ad8ba95221c964 Mon Sep 17 00:00:00 2001 From: Qiongsi Wu <[email protected]> Date: Wed, 4 Mar 2026 09:14:08 -0800 Subject: [PATCH 5/5] Address review comments. --- .../DependencyScanning/DependencyScannerImpl.h | 18 +++++++++--------- .../DependencyScannerImpl.cpp | 10 +++------- .../DependencyScanningWorker.cpp | 2 +- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/DependencyScanning/DependencyScannerImpl.h b/clang/include/clang/DependencyScanning/DependencyScannerImpl.h index 27bf1ce088ac8..05bef147e6f86 100644 --- a/clang/include/clang/DependencyScanning/DependencyScannerImpl.h +++ b/clang/include/clang/DependencyScanning/DependencyScannerImpl.h @@ -155,15 +155,15 @@ class CompilerInstanceWithContext { bool computeDependencies(StringRef ModuleName, DependencyConsumer &Consumer, DependencyActionController &Controller); - // FIXME: see if we can use a smaller fake buffer, or set the buffer's size - // dynamically. - // At the time of this commit, the estimated number of total unique importable - // names is around 3000 from Apple's SDKs. We usually import them in parallel, - // so it is unlikely that all names are all scanned by the same dependency - // scanning worker. Therefore the 64k (20x bigger than our estimate) size is - // sufficient to hold the unique source locations to report diagnostics per - // worker. - static const int32_t FakeInputBufferSize = 1 << 16; + // MaxNumOfQueries is the upper limit of the number of names the by-name + // scanning API (computeDependencies) can support after a + // CompilerInstanceWithContext is initialized. At the time of this commit, the + // estimated number of total unique importable names is around 3000 from + // Apple's SDKs. We usually import them in parallel, so it is unlikely that + // all names are all scanned by the same dependency scanning worker. Therefore + // the 64k (20x bigger than our estimate) size is sufficient to hold the + // unique source locations to report diagnostics per worker. + static const int32_t MaxNumOfQueries = 1 << 16; }; } // namespace dependencies } // namespace clang diff --git a/clang/lib/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/DependencyScanning/DependencyScannerImpl.cpp index 7708477d7abd2..2cd4ac6c564b4 100644 --- a/clang/lib/DependencyScanning/DependencyScannerImpl.cpp +++ b/clang/lib/DependencyScanning/DependencyScannerImpl.cpp @@ -869,6 +869,9 @@ bool CompilerInstanceWithContext::initialize( bool CompilerInstanceWithContext::computeDependencies( StringRef ModuleName, DependencyConsumer &Consumer, DependencyActionController &Controller) { + if (SrcLocOffset >= MaxNumOfQueries) + llvm::report_fatal_error("exceeded maximum by-name scans per worker"); + assert(CIPtr && "CIPtr must be initialized before calling this method"); auto &CI = *CIPtr; @@ -936,13 +939,6 @@ bool CompilerInstanceWithContext::computeDependencies( // FIXME: Scan modules asynchronously here as well. SrcLocOffset++; - if (SrcLocOffset >= FakeInputBufferSize) { - // This should never happen. We could have replaced this by an assert, - // but reporting a fatal error is more explicit, and we know - // immediately we are overflowing. - llvm::report_fatal_error("exceeded maximum by-name scans per worker; " - "increase module-import-by-name.input's size"); - } SmallVector<IdentifierLoc, 2> Path; IdentifierInfo *ModuleID = PP.getIdentifierInfo(ModuleName); diff --git a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp index 59f4ca0048085..32662fcf078a1 100644 --- a/clang/lib/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp @@ -166,7 +166,7 @@ dependencies::initVFSForTUBufferScanning( // unique source locations for the diagnostics. Therefore sharing // this global buffer across threads is ok. static const std::string - FakeInput(" ", CompilerInstanceWithContext::FakeInputBufferSize); + FakeInput(" ", CompilerInstanceWithContext::MaxNumOfQueries); std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>, std::vector<std::string>> _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
