https://github.com/yuhaouy updated 
https://github.com/llvm/llvm-project/pull/169560

>From d6a195fea92c992f85d3c9bae3fad379c01f61c1 Mon Sep 17 00:00:00 2001
From: Yu Hao <[email protected]>
Date: Mon, 24 Nov 2025 15:19:46 -0800
Subject: [PATCH 1/2] Add merge range-selector

---
 .../clang/Tooling/Transformer/RangeSelector.h |  4 ++
 clang/lib/Tooling/Transformer/Parsing.cpp     |  2 +-
 .../lib/Tooling/Transformer/RangeSelector.cpp | 50 +++++++++++++++++++
 clang/unittests/Tooling/RangeSelectorTest.cpp | 32 ++++++++++++
 4 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Tooling/Transformer/RangeSelector.h 
b/clang/include/clang/Tooling/Transformer/RangeSelector.h
index 462a9da8f10eb..c76a5106edd65 100644
--- a/clang/include/clang/Tooling/Transformer/RangeSelector.h
+++ b/clang/include/clang/Tooling/Transformer/RangeSelector.h
@@ -37,6 +37,10 @@ RangeSelector enclose(RangeSelector Begin, RangeSelector 
End);
 /// Convenience version of \c range where end-points are bound nodes.
 RangeSelector encloseNodes(std::string BeginID, std::string EndID);
 
+/// Selects the merge of the two ranges, i.e. from min(First.begin,
+/// Second.begin) to max(First.end, Second.end).
+RangeSelector merge(RangeSelector First, RangeSelector Second);
+
 /// DEPRECATED. Use `enclose`.
 inline RangeSelector range(RangeSelector Begin, RangeSelector End) {
   return enclose(std::move(Begin), std::move(End));
diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp 
b/clang/lib/Tooling/Transformer/Parsing.cpp
index 19a1c7c66df46..f7bffda6967a9 100644
--- a/clang/lib/Tooling/Transformer/Parsing.cpp
+++ b/clang/lib/Tooling/Transformer/Parsing.cpp
@@ -108,7 +108,7 @@ getBinaryStringSelectors() {
 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
 getBinaryRangeSelectors() {
   static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
-      M = {{"enclose", enclose}, {"between", between}};
+      M = {{"enclose", enclose}, {"between", between}, {"merge", merge}};
   return M;
 }
 
diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp 
b/clang/lib/Tooling/Transformer/RangeSelector.cpp
index 54a1590d3106d..948cf9c7e712f 100644
--- a/clang/lib/Tooling/Transformer/RangeSelector.cpp
+++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp
@@ -178,6 +178,56 @@ RangeSelector transformer::encloseNodes(std::string 
BeginID,
   return transformer::enclose(node(std::move(BeginID)), 
node(std::move(EndID)));
 }
 
+RangeSelector transformer::merge(RangeSelector First, RangeSelector Second) {
+  return [First,
+          Second](const MatchResult &Result) -> Expected<CharSourceRange> {
+    Expected<CharSourceRange> FirstRange = First(Result);
+    if (!FirstRange)
+      return FirstRange.takeError();
+    Expected<CharSourceRange> SecondRange = Second(Result);
+    if (!SecondRange)
+      return SecondRange.takeError();
+    // Result begin loc is the minimum of the begin locs of the two ranges.
+    SourceLocation B = FirstRange->getBegin() < SecondRange->getBegin()
+                           ? FirstRange->getBegin()
+                           : SecondRange->getBegin();
+    if (FirstRange->isTokenRange() && SecondRange->isTokenRange()) {
+      // Both ranges are token ranges. Just take the maximum of their end locs.
+      SourceLocation E = FirstRange->getEnd() > SecondRange->getEnd()
+                             ? FirstRange->getEnd()
+                             : SecondRange->getEnd();
+      return CharSourceRange::getTokenRange(B, E);
+    }
+    SourceLocation FirstE = FirstRange->getEnd();
+    if (FirstRange->isTokenRange()) {
+      // The end of the first range is a token. Need to resolve the token to a
+      // char range.
+      CharSourceRange EndRange = Lexer::makeFileCharRange(
+          CharSourceRange::getTokenRange(FirstRange->getEnd()),
+          *Result.SourceManager, Result.Context->getLangOpts());
+      if (EndRange.isInvalid())
+        return invalidArgumentError(
+            "merge: can't resolve first token range to valid source range");
+      FirstE = EndRange.getEnd();
+    }
+    SourceLocation SecondE = SecondRange->getEnd();
+    if (SecondRange->isTokenRange()) {
+      // The end of the second range is a token. Need to resolve the token to a
+      // char range.
+      CharSourceRange EndRange = Lexer::makeFileCharRange(
+          CharSourceRange::getTokenRange(SecondRange->getEnd()),
+          *Result.SourceManager, Result.Context->getLangOpts());
+      if (EndRange.isInvalid())
+        return invalidArgumentError(
+            "merge: can't resolve second token range to valid source range");
+      SecondE = EndRange.getEnd();
+    }
+    // Result end loc is the maximum of the end locs of the two ranges.
+    SourceLocation E = FirstE > SecondE ? FirstE : SecondE;
+    return CharSourceRange::getCharRange(B, E);
+  };
+}
+
 RangeSelector transformer::member(std::string ID) {
   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
     Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
diff --git a/clang/unittests/Tooling/RangeSelectorTest.cpp 
b/clang/unittests/Tooling/RangeSelectorTest.cpp
index d441da165b09b..9feca3ca0bed8 100644
--- a/clang/unittests/Tooling/RangeSelectorTest.cpp
+++ b/clang/unittests/Tooling/RangeSelectorTest.cpp
@@ -327,6 +327,38 @@ TEST(RangeSelectorTest, EncloseOpGeneralParsed) {
   EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7"));
 }
 
+TEST(RangeSelectorTest, MergeOp) {
+  StringRef Code = R"cc(
+    int f(int x, int y, int z) { return 3; }
+    int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+  )cc";
+  auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+                          hasArgument(1, expr().bind("a1")),
+                          hasArgument(2, expr().bind("a2")));
+  RangeSelector R = merge(node("a0"), node("a1"));
+  TestMatch Match = matchCode(Code, Matcher);
+  EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3, 7"));
+  R = merge(node("a2"), node("a1"));
+  EXPECT_THAT_EXPECTED(select(R, Match), HasValue("7 /* comment */, 9"));
+}
+
+TEST(RangeSelectorTest, MergeOpParsed) {
+  StringRef Code = R"cc(
+    int f(int x, int y, int z) { return 3; }
+    int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+  )cc";
+  auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+                          hasArgument(1, expr().bind("a1")),
+                          hasArgument(2, expr().bind("a2")));
+  auto R = parseRangeSelector(R"rs(merge(node("a0"), node("a1")))rs");
+  ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+  TestMatch Match = matchCode(Code, Matcher);
+  EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7"));
+  R = parseRangeSelector(R"rs(merge(node("a2"), node("a1")))rs");
+  ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+  EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("7 /* comment */, 9"));
+}
+
 TEST(RangeSelectorTest, NodeOpStatement) {
   StringRef Code = "int f() { return 3; }";
   TestMatch Match = matchCode(Code, returnStmt().bind("id"));

>From a9933031791f121631f00ee5d1836ad8aeb32e1d Mon Sep 17 00:00:00 2001
From: Yu Hao <[email protected]>
Date: Tue, 25 Nov 2025 17:58:34 -0800
Subject: [PATCH 2/2] Fix loc comparision and get end-of-token

---
 .../lib/Tooling/Transformer/RangeSelector.cpp | 45 +++++++++++--------
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp 
b/clang/lib/Tooling/Transformer/RangeSelector.cpp
index 948cf9c7e712f..68b16f91652fb 100644
--- a/clang/lib/Tooling/Transformer/RangeSelector.cpp
+++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp
@@ -187,43 +187,50 @@ RangeSelector transformer::merge(RangeSelector First, 
RangeSelector Second) {
     Expected<CharSourceRange> SecondRange = Second(Result);
     if (!SecondRange)
       return SecondRange.takeError();
+
+    SourceLocation FirstB = FirstRange->getBegin();
+    SourceLocation FirstE = FirstRange->getEnd();
+    SourceLocation SecondB = SecondRange->getBegin();
+    SourceLocation SecondE = SecondRange->getEnd();
     // Result begin loc is the minimum of the begin locs of the two ranges.
-    SourceLocation B = FirstRange->getBegin() < SecondRange->getBegin()
-                           ? FirstRange->getBegin()
-                           : SecondRange->getBegin();
+    SourceLocation B =
+        Result.SourceManager->isBeforeInTranslationUnit(FirstB, SecondB)
+            ? FirstB
+            : SecondB;
     if (FirstRange->isTokenRange() && SecondRange->isTokenRange()) {
       // Both ranges are token ranges. Just take the maximum of their end locs.
-      SourceLocation E = FirstRange->getEnd() > SecondRange->getEnd()
-                             ? FirstRange->getEnd()
-                             : SecondRange->getEnd();
+      SourceLocation E =
+          Result.SourceManager->isBeforeInTranslationUnit(FirstE, SecondE)
+              ? SecondE
+              : FirstE;
       return CharSourceRange::getTokenRange(B, E);
     }
-    SourceLocation FirstE = FirstRange->getEnd();
+
     if (FirstRange->isTokenRange()) {
       // The end of the first range is a token. Need to resolve the token to a
       // char range.
-      CharSourceRange EndRange = Lexer::makeFileCharRange(
-          CharSourceRange::getTokenRange(FirstRange->getEnd()),
-          *Result.SourceManager, Result.Context->getLangOpts());
-      if (EndRange.isInvalid())
+      FirstE = Lexer::getLocForEndOfToken(FirstE, /*Offset=*/0,
+                                          *Result.SourceManager,
+                                          Result.Context->getLangOpts());
+      if (FirstE.isInvalid())
         return invalidArgumentError(
             "merge: can't resolve first token range to valid source range");
-      FirstE = EndRange.getEnd();
     }
-    SourceLocation SecondE = SecondRange->getEnd();
     if (SecondRange->isTokenRange()) {
       // The end of the second range is a token. Need to resolve the token to a
       // char range.
-      CharSourceRange EndRange = Lexer::makeFileCharRange(
-          CharSourceRange::getTokenRange(SecondRange->getEnd()),
-          *Result.SourceManager, Result.Context->getLangOpts());
-      if (EndRange.isInvalid())
+      SecondE = Lexer::getLocForEndOfToken(SecondE, /*Offset=*/0,
+                                           *Result.SourceManager,
+                                           Result.Context->getLangOpts());
+      if (SecondE.isInvalid())
         return invalidArgumentError(
             "merge: can't resolve second token range to valid source range");
-      SecondE = EndRange.getEnd();
     }
     // Result end loc is the maximum of the end locs of the two ranges.
-    SourceLocation E = FirstE > SecondE ? FirstE : SecondE;
+    SourceLocation E =
+        Result.SourceManager->isBeforeInTranslationUnit(FirstE, SecondE)
+            ? SecondE
+            : FirstE;
     return CharSourceRange::getCharRange(B, E);
   };
 }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to