https://github.com/Decodetalkers updated 
https://github.com/llvm/llvm-project/pull/200001

>From 706e29d8b9aac8bf866b3e8ac824c7ac452191e3 Mon Sep 17 00:00:00 2001
From: ShootingStarDragons <[email protected]>
Date: Thu, 28 May 2026 00:16:49 +0900
Subject: [PATCH] feat: add gcc scan rules

this will make completion will work after using gcc to compile the whole
project
---
 clang-tools-extra/clangd/ModulesBuilder.cpp |  24 +++
 clang-tools-extra/clangd/ProjectModules.cpp | 210 +++++++++++++++++++-
 2 files changed, 232 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp 
b/clang-tools-extra/clangd/ModulesBuilder.cpp
index 706fd459e15ec..14fc987296e6a 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.cpp
+++ b/clang-tools-extra/clangd/ModulesBuilder.cpp
@@ -862,6 +862,7 @@ class CachingProjectModules : public ProjectModules {
   }
 
   std::vector<std::string> getRequiredModules(PathRef File) override {
+    elog("========= get module here ======");
     return MDB->getRequiredModules(File);
   }
 
@@ -873,10 +874,12 @@ class CachingProjectModules : public ProjectModules {
     return MDB->getModuleNameState(ModuleName);
   }
 
+  // NOTE: then it enter here
   std::string getSourceForModuleName(llvm::StringRef ModuleName,
                                      PathRef RequiredSrcFile) override {
     auto ModuleState = MDB->getModuleNameState(ModuleName);
 
+    elog("Scan start");
     if (ModuleState == ModuleNameState::Multiple) {
       std::string CachedResult =
           Cache.getMultipleSourceForModuleName(ModuleName, RequiredSrcFile);
@@ -884,6 +887,8 @@ class CachingProjectModules : public ProjectModules {
       // Verify Cached Result by seeing if the source declaring the same module
       // as we query.
       if (!CachedResult.empty()) {
+        // NOTE: MDB is ScanningAllProjectModules
+
         std::string ModuleNameOfCachedSource =
             MDB->getModuleNameForSource(CachedResult);
         if (ModuleNameOfCachedSource == ModuleName)
@@ -1059,6 +1064,7 @@ void ModulesBuilder::ModulesBuilderImpl::
       CacheRoot);
 }
 
+// I need to add the data here instead go to the next logic
 void ModulesBuilder::ModulesBuilderImpl::getPrebuiltModuleFile(
     StringRef ModuleName, PathRef ModuleUnitFileName, const ThreadsafeFS &TFS,
     ReusablePrerequisiteModules &BuiltModuleFiles) {
@@ -1103,14 +1109,20 @@ void 
ModulesBuilder::ModulesBuilderImpl::getPrebuiltModuleFile(
   }
 }
 
+// NOTE Problem is the BuiltModuleFiles
+// NOTE: so it is the first place it start scanning
+// FIXME: cannot use gcc.gcm
 llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
     PathRef RequiredSource, StringRef ModuleName, const ThreadsafeFS &TFS,
     CachingProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles) 
{
   if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
     return llvm::Error::success();
+  elog("============== scanning starts from here? =============");
 
   std::string ModuleUnitFileName =
       MDB.getSourceForModuleName(ModuleName, RequiredSource);
+
+  elog("====== so we got gcc module source {0}, file is : {1}", ModuleName, 
ModuleUnitFileName);
   /// It is possible that we're meeting third party modules (modules whose
   /// source are not in the project. e.g, the std module may be a third-party
   /// module for most project) or something wrong with the implementation of
@@ -1129,7 +1141,11 @@ llvm::Error 
ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
 
   // Get Required modules in topological order.
   auto ReqModuleNames = getAllRequiredModules(RequiredSource, MDB, ModuleName);
+  // NOTE: Ok, seems we can get the right module name, also , we can get its 
source.
+  // Then let's do it
+  // We should not reuse the logic of clangd, This time we can get the Cmd, so 
we can also know if it is gcc
   for (llvm::StringRef ReqModuleName : ReqModuleNames) {
+    elog("ReqModuleName: {0}", ReqModuleName.str());
     if (BuiltModuleFiles.isModuleUnitBuilt(ReqModuleName))
       continue;
 
@@ -1144,6 +1160,7 @@ llvm::Error 
ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
       garbageCollectModuleCacheForProjectRoot(PI->SourceRoot);
 
     const std::string CommandHash = getCompileCommandStringHash(*Cmd);
+    // NOTE: this logic is for clang++, so We should never use it
     const std::string PublishedModuleFilePath = getPublishedModuleFilePath(
         ReqModuleName, getModuleFilesDirectory(ReqFileName, *Cmd, getCDB()));
 
@@ -1219,6 +1236,8 @@ llvm::Error 
ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
 }
 
 bool ModulesBuilder::hasRequiredModules(PathRef File) {
+  // NOTE: the right logic is in DirectoryBasedGlobalCompilationDatabase
+  // Now it is CompoundProjectModules
   std::unique_ptr<ProjectModules> MDB = Impl->getCDB().getProjectModules(File);
   if (!MDB)
     return false;
@@ -1228,10 +1247,14 @@ bool ModulesBuilder::hasRequiredModules(PathRef File) {
   return !CachedMDB.getRequiredModules(File).empty();
 }
 
+// NOTE: so we finally got to the position
+// Maybe we can just create a gcc version
 std::unique_ptr<PrerequisiteModules>
 ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
                                             const ThreadsafeFS &TFS) {
+  // NOTE: them MDB always is the DirectoryBasedGlobalCompilationDatabase
   std::unique_ptr<ProjectModules> MDB = Impl->getCDB().getProjectModules(File);
+  elog("Enter here? =====================");
   if (!MDB) {
     elog("Failed to get Project Modules information for {0}", File);
     return std::make_unique<FailedPrerequisiteModules>();
@@ -1244,6 +1267,7 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
   if (RequiredModuleNames.empty())
     return std::make_unique<ReusablePrerequisiteModules>();
 
+  // NOTE: Seems we need to change here
   auto RequiredModules = std::make_unique<ReusablePrerequisiteModules>();
   for (llvm::StringRef RequiredModuleName : RequiredModuleNames) {
     // Return early if there is any error.
diff --git a/clang-tools-extra/clangd/ProjectModules.cpp 
b/clang-tools-extra/clangd/ProjectModules.cpp
index d3727171bff12..4e856999a982a 100644
--- a/clang-tools-extra/clangd/ProjectModules.cpp
+++ b/clang-tools-extra/clangd/ProjectModules.cpp
@@ -12,11 +12,16 @@
 #include "clang/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanningTool.h"
 #include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Regex.h"
 #include "llvm/TargetParser/Host.h"
 
 namespace clang::clangd {
@@ -171,6 +176,8 @@ class ModuleDependencyScanner {
   /// Scanning the single file specified by \param FilePath.
   std::optional<ModuleDependencyInfo>
   scan(PathRef FilePath, const ProjectModules::CommandMangler &Mangler);
+  std::optional<ModuleDependencyInfo> scanGcc(tooling::CompileCommand Cmd,
+                                              PathRef MapFile);
 
   /// Scanning every source file in the current project to get the
   /// <module-name> to <module-unit-source> map.
@@ -209,15 +216,214 @@ class ModuleDependencyScanner {
   llvm::StringMap<std::string> ModuleNameToSource;
 };
 
+namespace gcc {
+static const llvm::Regex ImportRegex =
+    llvm::Regex("import: ([^ ]*) ([^ ]*.gcm)");
+static const llvm::Regex ModuleRegex = llvm::Regex("module: ([^ ]*)");
+static const llvm::Regex SourceRegex = llvm::Regex("source: ([^ ]*)");
+static const llvm::Regex CwdRegex = llvm::Regex("cwd: ([^ ]*)");
+
+static const llvm::Regex ModmapRegex = llvm::Regex("([^ ^$^\n]*) ([^ ]*.gcm)");
+
+struct RoadMapInfo {
+  std::string Name;
+  std::string Path;
+};
+
+struct ReadElfInfo {
+  std::string Source;
+  std::string ModuleName;
+  std::vector<std::string> Imports;
+
+  static std::optional<ReadElfInfo> get(llvm::StringRef Source);
+};
+
+std::optional<ReadElfInfo> ReadElfInfo::get(llvm::StringRef Content) {
+  std::vector<std::string> Imports = {};
+  std::string Source;
+  std::string ModuleName;
+  std::string Cwd;
+  {
+    llvm::StringRef CwdText = Content;
+    llvm::SmallVector<llvm::StringRef, 1> Matches;
+    std::string Error;
+    if (!CwdRegex.match(CwdText, &Matches, &Error)) {
+      return std::nullopt;
+    }
+    Cwd = Matches[1].trim().str();
+  }
+  {
+    llvm::StringRef ImportText = Content;
+    while (!ImportText.empty()) {
+      llvm::SmallVector<llvm::StringRef, 2> Matches;
+      std::string Error;
+      if (!ImportRegex.match(ImportText, &Matches, &Error)) {
+        break;
+      }
+
+      auto ImportModule = Matches[1].trim().str();
+      Imports.push_back(ImportModule);
+      size_t Pos = ImportText.find(Matches[0]);
+      ImportText = ImportText.drop_front(Pos + Matches[0].size());
+    }
+  }
+
+  {
+    llvm::StringRef SourceText = Content;
+    llvm::SmallVector<llvm::StringRef, 1> Matches;
+    std::string Error;
+    if (!SourceRegex.match(SourceText, &Matches, &Error)) {
+      return std::nullopt;
+    }
+
+    llvm::StringRef SourcePa = Matches[1].trim();
+    if (llvm::sys::path::is_absolute(SourcePa)) {
+      Source = SourcePa.str();
+
+    } else {
+      llvm::StringRef PathRef = Cwd;
+      llvm::SmallString<128> CurrentPath = PathRef;
+      llvm::sys::path::append(CurrentPath, SourcePa);
+      Source = CurrentPath.str();
+    }
+  }
+  {
+    llvm::StringRef ModuleText = Content;
+    llvm::SmallVector<llvm::StringRef, 1> Matches;
+    std::string Error;
+    if (!ModuleRegex.match(ModuleText, &Matches, &Error)) {
+      return std::nullopt;
+    }
+    ModuleName = Matches[1].trim().str();
+  }
+  return ReadElfInfo{Source, ModuleName, Imports};
+}
+
+static bool fitGccModulePath(std::string Cmd) {
+  llvm::StringRef Arg = Cmd;
+  return Arg.starts_with("-fmodule-mapper=") && Arg.ends_with("modmap");
+}
+
+std::optional<ReadElfInfo> scanGcm(llvm::StringRef GCMPath) {
+  llvm::SmallString<64> OutputFile;
+  llvm::sys::fs::createTemporaryFile("readref", "", OutputFile);
+  llvm::FileRemover OutRemover(OutputFile);
+  std::optional<llvm::StringRef> Redirects[3] = {
+      /*Stdin*/ {""}, {OutputFile.str()}, {}};
+  std::string ErrorMessage;
+  auto Readelf = llvm::sys::findProgramByName("readelf");
+  if (!Readelf) {
+    return std::nullopt;
+  }
+  int Ret = llvm::sys::ExecuteAndWait(
+      *Readelf, {"readelf", "-p.gnu.c++.README", GCMPath}, std::nullopt,
+      Redirects, 10, 0, &ErrorMessage);
+  if (Ret != 0) {
+    return std::nullopt;
+  }
+  auto Buf = llvm::MemoryBuffer::getFile(OutputFile);
+
+  if (!Buf) {
+    return std::nullopt;
+  }
+  llvm::StringRef Path = Buf->get()->getBuffer().trim();
+  if (Path.empty()) {
+    return std::nullopt;
+  }
+  llvm::StringRef Text = Path;
+  return ReadElfInfo::get(Text);
+}
+
+struct ModuleResult {
+  std::optional<std::string> ModuleName;
+  std::vector<std::string> RequiredModules;
+};
+
+} // namespace gcc
+std::optional<ModuleDependencyScanner::ModuleDependencyInfo>
+ModuleDependencyScanner::scanGcc(tooling::CompileCommand Cmd, PathRef MapFile) 
{
+  using namespace gcc;
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> File =
+      llvm::MemoryBuffer::getFile(MapFile);
+  if (std::error_code Result = File.getError()) {
+    elog("File Not Found, {0}", MapFile);
+    return std::nullopt;
+  }
+  auto Content = (*File)->getBuffer();
+  llvm::StringRef CurrentDir = Cmd.Directory;
+  std::vector<gcc::RoadMapInfo> RoadMapInfos = {};
+  llvm::StringRef Text = Content;
+  while (!Text.empty()) {
+    llvm::SmallVector<llvm::StringRef, 2> Matches;
+    std::string Error;
+    if (!gcc::ModmapRegex.match(Text, &Matches, &Error)) {
+      break;
+    }
+
+    auto Name = Matches[1].trim().str();
+
+    auto ReadPath = Matches[2].trim();
+    std::string Path;
+    if (llvm::sys::path::is_absolute(ReadPath)) {
+      Path = ReadPath.str();
+    } else {
+      llvm::SmallString<128> CurrentPath = CurrentDir;
+      llvm::sys::path::append(CurrentPath, ReadPath);
+      Path = CurrentPath.str();
+    }
+    RoadMapInfos.push_back(RoadMapInfo{Name, Path});
+
+    size_t Pos = Text.find(Matches[0]);
+    Text = Text.drop_front(Pos + Matches[0].size());
+  }
+  elog("fileName: {0} ", Cmd.Filename);
+  ModuleDependencyScanner::ModuleDependencyInfo Result;
+  for (const RoadMapInfo &Info : RoadMapInfos) {
+    auto GCMInfo = scanGcm(Info.Path);
+    if (!GCMInfo) {
+      continue;
+    }
+
+    ModuleNameToSource.try_emplace(GCMInfo->ModuleName, GCMInfo->Source);
+    elog("Info {0}, CurrentPath: {1}, source: {2}", Info.Name, Cmd.Filename,
+         GCMInfo->Source);
+    if (GCMInfo->Source == Cmd.Filename) {
+      elog("Hello? {0}, {1}", GCMInfo->Source, GCMInfo->ModuleName);
+      Result.ModuleName = GCMInfo->ModuleName;
+    }
+    Result.RequiredModules.push_back(Info.Name);
+  }
+  return Result;
+}
+
+// I need to read deeper here
+// problem is here
+// We can read the data from modmap
+// But we cannot get the the required module
+// it can be itself
 std::optional<ModuleDependencyScanner::ModuleDependencyInfo>
 ModuleDependencyScanner::scan(PathRef FilePath,
                               const ProjectModules::CommandMangler &Mangler) {
+  // FIXME: why it always become clang++? or it it the problem here?
   auto Cmd = getCompileCommandForFile(*CDB, FilePath, Mangler);
-  if (!Cmd)
-    return std::nullopt;
+
+  elog("filepath: {0}, dir: {1}", FilePath, Cmd->Directory);
 
   using namespace clang::tooling;
 
+  auto CmdLine = Cmd->CommandLine;
+  auto It = llvm::find_if(CmdLine, gcc::fitGccModulePath);
+  if (It != CmdLine.end()) {
+    llvm::StringRef Module = *It;
+    // NOTE: we can use it to check the module Name, and its name
+    if (Module.consume_front("-fmodule-mapper=")) {
+      elog("Enter: filepath: {0}, dir: {1}", FilePath, Cmd->Directory);
+      llvm::StringRef Cwd = Cmd->Directory;
+      llvm::SmallString<128> MapFile = Cwd;
+      llvm::sys::path::append(MapFile, Module);
+      return scanGcc(*Cmd, MapFile);
+    }
+  }
   DependencyScanningTool ScanningTool(Service);
 
   std::string S;

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to