[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
CarlosAlbertoEnciso added a comment. In https://reviews.llvm.org/D48441#1151889, @ioeric wrote: > Hi Carlos, thanks for letting us know! I sent r336249 to fix the windows > test. Hi @ioeric, Thanks very much. Happy to help. Repository: rL LLVM https://reviews.llvm.org/D48441 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
ioeric added a subscriber: sammccall. ioeric added a comment. Hi Carlos, thanks for letting us know! I sent r336249 to fix the windows test. Repository: rL LLVM https://reviews.llvm.org/D48441 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
Hi Carlos, thanks for letting us know! I sent r336249 to fix the windows test. On Wed, Jul 4, 2018 at 9:51 AM Carlos Alberto Enciso via Phabricator < revi...@reviews.llvm.org> wrote: > CarlosAlbertoEnciso added a comment. > > Hi, > > It seems that your patch is causing an error in the tests for > > http://lab.llvm.org:8011/builders/clang-x64-ninja-win7 > > > http://lab.llvm.org:8011/builders/clang-x64-ninja-win7/builds/11510/steps/ninja%20check%201/logs/FAIL%3A%20Extra%20Tools%20Unit%20Tests%3A%3AFileDistanceTests.URI > > Thanks very much > > > Repository: > rL LLVM > > https://reviews.llvm.org/D48441 > > > > ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
CarlosAlbertoEnciso added a comment. Hi, It seems that your patch is causing an error in the tests for http://lab.llvm.org:8011/builders/clang-x64-ninja-win7 http://lab.llvm.org:8011/builders/clang-x64-ninja-win7/builds/11510/steps/ninja%20check%201/logs/FAIL%3A%20Extra%20Tools%20Unit%20Tests%3A%3AFileDistanceTests.URI Thanks very much Repository: rL LLVM https://reviews.llvm.org/D48441 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
This revision was automatically updated to reflect the committed changes. Closed by commit rL336177: [clangd] Incorporate transitive #includes into code complete proximity scoring. (authored by sammccall, committed by ). Herald added a subscriber: llvm-commits. Repository: rL LLVM https://reviews.llvm.org/D48441 Files: clang-tools-extra/trunk/clangd/CMakeLists.txt clang-tools-extra/trunk/clangd/ClangdServer.cpp clang-tools-extra/trunk/clangd/ClangdUnit.cpp clang-tools-extra/trunk/clangd/ClangdUnit.h clang-tools-extra/trunk/clangd/CodeComplete.cpp clang-tools-extra/trunk/clangd/CodeComplete.h clang-tools-extra/trunk/clangd/FileDistance.cpp clang-tools-extra/trunk/clangd/FileDistance.h clang-tools-extra/trunk/clangd/Headers.cpp clang-tools-extra/trunk/clangd/Headers.h clang-tools-extra/trunk/clangd/Quality.cpp clang-tools-extra/trunk/clangd/Quality.h clang-tools-extra/trunk/clangd/XRefs.cpp clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt clang-tools-extra/trunk/unittests/clangd/FileDistanceTests.cpp clang-tools-extra/trunk/unittests/clangd/HeadersTests.cpp clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp Index: clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp === --- clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp +++ clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp @@ -17,6 +17,7 @@ // //===--===// +#include "FileDistance.h" #include "Quality.h" #include "TestFS.h" #include "TestTU.h" @@ -162,9 +163,22 @@ PoorNameMatch.NameMatch = 0.2f; EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate()); - SymbolRelevanceSignals WithProximity; - WithProximity.SemaProximityScore = 0.2f; - EXPECT_GT(WithProximity.evaluate(), Default.evaluate()); + SymbolRelevanceSignals WithSemaProximity; + WithSemaProximity.SemaProximityScore = 0.2f; + EXPECT_GT(WithSemaProximity.evaluate(), Default.evaluate()); + + SymbolRelevanceSignals IndexProximate; + IndexProximate.SymbolURI = "unittest:/foo/bar.h"; + llvm::StringMap ProxSources; + ProxSources.try_emplace(testPath("foo/baz.h")); + URIDistance Distance(ProxSources); + IndexProximate.FileProximityMatch = + EXPECT_GT(IndexProximate.evaluate(), Default.evaluate()); + SymbolRelevanceSignals IndexDistant = IndexProximate; + IndexDistant.SymbolURI = "unittest:/elsewhere/path.h"; + EXPECT_GT(IndexProximate.evaluate(), IndexDistant.evaluate()) + << IndexProximate << IndexDistant; + EXPECT_GT(IndexDistant.evaluate(), Default.evaluate()); SymbolRelevanceSignals Scoped; Scoped.Scope = SymbolRelevanceSignals::FileScope; @@ -185,59 +199,6 @@ EXPECT_LT(sortText(0, "a"), sortText(0, "z")); } -// {a,b,c} becomes /clangd-test/a/b/c -std::string joinPaths(llvm::ArrayRef Parts) { - return testPath( - llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator())); -} - -static constexpr float ProximityBase = 0.7f; - -// Calculates a proximity score for an index symbol with declaration file -// SymPath with the given URI scheme. -float URIProximity(const FileProximityMatcher , StringRef SymPath, - StringRef Scheme = "file") { - auto U = URI::create(SymPath, Scheme); - EXPECT_TRUE(static_cast(U)) << llvm::toString(U.takeError()); - return Matcher.uriProximity(U->toString()); -} - -TEST(QualityTests, URIProximityScores) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"a", "b", "c", "d", "x"})}); - - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "x"})), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "y"})), - ProximityBase); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "y", "z"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "e", "y"})), - std::pow(ProximityBase, 2)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "m", "n", "o", "y"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "t", "m", "n", "o", "y"})), - std::pow(ProximityBase, 6)); - // Note the common directory is /clang-test/ - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "o", "p", "y"})), - std::pow(ProximityBase, 7)); -} - -TEST(QualityTests, URIProximityScoresWithTestURI) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"b", "c", "x"})}); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "c", "x"}), "unittest"), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "y"}), "unittest"), - std::pow(ProximityBase, 2)); - // unittest:///b/c/x vs unittest:///m/n/y. No common directory. - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m",
[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
sammccall added a comment. Performance on a transitively big file. Stress test - global scope completion with index turned off. Benchmarking codeComplete() call 100x per trial, trials randomly ordered. Before this patch (427ms 421ms 430ms) After this patch (418ms 420ms 417ms) So it seems this is slightly faster, maybe 2%. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D48441 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
sammccall updated this revision to Diff 153867. sammccall added a comment. Rebase Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D48441 Files: clangd/CMakeLists.txt clangd/ClangdServer.cpp clangd/ClangdUnit.cpp clangd/ClangdUnit.h clangd/CodeComplete.cpp clangd/CodeComplete.h clangd/FileDistance.cpp clangd/FileDistance.h clangd/Headers.cpp clangd/Headers.h clangd/Quality.cpp clangd/Quality.h clangd/XRefs.cpp unittests/clangd/CMakeLists.txt unittests/clangd/FileDistanceTests.cpp unittests/clangd/HeadersTests.cpp unittests/clangd/QualityTests.cpp Index: unittests/clangd/QualityTests.cpp === --- unittests/clangd/QualityTests.cpp +++ unittests/clangd/QualityTests.cpp @@ -17,6 +17,7 @@ // //===--===// +#include "FileDistance.h" #include "Quality.h" #include "TestFS.h" #include "TestTU.h" @@ -162,9 +163,22 @@ PoorNameMatch.NameMatch = 0.2f; EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate()); - SymbolRelevanceSignals WithProximity; - WithProximity.SemaProximityScore = 0.2f; - EXPECT_GT(WithProximity.evaluate(), Default.evaluate()); + SymbolRelevanceSignals WithSemaProximity; + WithSemaProximity.SemaProximityScore = 0.2f; + EXPECT_GT(WithSemaProximity.evaluate(), Default.evaluate()); + + SymbolRelevanceSignals IndexProximate; + IndexProximate.SymbolURI = "unittest:/foo/bar.h"; + llvm::StringMap ProxSources; + ProxSources.try_emplace(testPath("foo/baz.h")); + URIDistance Distance(ProxSources); + IndexProximate.FileProximityMatch = + EXPECT_GT(IndexProximate.evaluate(), Default.evaluate()); + SymbolRelevanceSignals IndexDistant = IndexProximate; + IndexDistant.SymbolURI = "unittest:/elsewhere/path.h"; + EXPECT_GT(IndexProximate.evaluate(), IndexDistant.evaluate()) + << IndexProximate << IndexDistant; + EXPECT_GT(IndexDistant.evaluate(), Default.evaluate()); SymbolRelevanceSignals Scoped; Scoped.Scope = SymbolRelevanceSignals::FileScope; @@ -185,59 +199,6 @@ EXPECT_LT(sortText(0, "a"), sortText(0, "z")); } -// {a,b,c} becomes /clangd-test/a/b/c -std::string joinPaths(llvm::ArrayRef Parts) { - return testPath( - llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator())); -} - -static constexpr float ProximityBase = 0.7f; - -// Calculates a proximity score for an index symbol with declaration file -// SymPath with the given URI scheme. -float URIProximity(const FileProximityMatcher , StringRef SymPath, - StringRef Scheme = "file") { - auto U = URI::create(SymPath, Scheme); - EXPECT_TRUE(static_cast(U)) << llvm::toString(U.takeError()); - return Matcher.uriProximity(U->toString()); -} - -TEST(QualityTests, URIProximityScores) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"a", "b", "c", "d", "x"})}); - - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "x"})), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "y"})), - ProximityBase); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "y", "z"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "e", "y"})), - std::pow(ProximityBase, 2)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "m", "n", "o", "y"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "t", "m", "n", "o", "y"})), - std::pow(ProximityBase, 6)); - // Note the common directory is /clang-test/ - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "o", "p", "y"})), - std::pow(ProximityBase, 7)); -} - -TEST(QualityTests, URIProximityScoresWithTestURI) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"b", "c", "x"})}); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "c", "x"}), "unittest"), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "y"}), "unittest"), - std::pow(ProximityBase, 2)); - // unittest:///b/c/x vs unittest:///m/n/y. No common directory. - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "y"}), "unittest"), - std::pow(ProximityBase, 4)); -} - } // namespace } // namespace clangd } // namespace clang Index: unittests/clangd/HeadersTests.cpp === --- unittests/clangd/HeadersTests.cpp +++ unittests/clangd/HeadersTests.cpp @@ -64,18 +64,17 @@ } protected: - std::vector collectIncludes() { + IncludeStructure collectIncludes() { auto Clang = setupClang(); PreprocessOnlyAction Action; EXPECT_TRUE( Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])); -std::vector Inclusions; -
[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
sammccall marked an inline comment as done. sammccall added a comment. Sigh... sorry, forgot to send these comments with the last patch. Will measure performance today. Comment at: clangd/FileDistance.cpp:9 +//===--===// +// +// ioeric wrote: > Is this intentionally reserved? Oops, added implementation docs Comment at: clangd/FileDistance.cpp:51 +for (StringRef Rest = Canonical; !Rest.empty();) { + Rest = parent_path(Rest, sys::path::Style::posix); + auto NextHash = hash_value(Rest); ioeric wrote: > nit: this probably deserve a comment or a better formatting. It looks like > part of the loop incremental. Moved out of the loop, now the loop has a counter. Hopefully less confusing? Comment at: clangd/FileDistance.cpp:86 + auto Canonical = canonicalize(Path); + unsigned Cost = kUnreachable; + SmallVector Ancestors; ioeric wrote: > Should we also check the cache here? I'm not sure why - for the full path we'll look up the cached value in the first iteration through the loop. Ancestors will be empty, so this function should be pretty cheap for this happy path - dominated by computing the hash. or do you mean to skip canonicalization? This doesn't matter in practice as we'll use this through URIDistance which does a pre-canonicalization cache. If we used this directly then it might be worthwhile. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D48441 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
ioeric accepted this revision. ioeric added a comment. This revision is now accepted and ready to land. lgtm. Again, we might still want to measure the performance impact on files with potentially large #include tree ;) Comment at: clangd/FileDistance.cpp:26 +// /bar/foo = 1 +///foo = 1 +// /foo/bar = 2 Shouldn't /foo a down from / and `2+1 = 3`? Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D48441 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
sammccall updated this revision to Diff 153504. sammccall marked 2 inline comments as done. sammccall added a comment. Address review comments. Most notably: limit the up-traversals from some sources. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D48441 Files: clangd/CMakeLists.txt clangd/ClangdServer.cpp clangd/ClangdUnit.cpp clangd/ClangdUnit.h clangd/CodeComplete.cpp clangd/CodeComplete.h clangd/FileDistance.cpp clangd/FileDistance.h clangd/Headers.cpp clangd/Headers.h clangd/Quality.cpp clangd/Quality.h clangd/XRefs.cpp unittests/clangd/CMakeLists.txt unittests/clangd/FileDistanceTests.cpp unittests/clangd/HeadersTests.cpp unittests/clangd/QualityTests.cpp Index: unittests/clangd/QualityTests.cpp === --- unittests/clangd/QualityTests.cpp +++ unittests/clangd/QualityTests.cpp @@ -17,6 +17,7 @@ // //===--===// +#include "FileDistance.h" #include "Quality.h" #include "TestFS.h" #include "TestTU.h" @@ -157,9 +158,22 @@ PoorNameMatch.NameMatch = 0.2f; EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate()); - SymbolRelevanceSignals WithProximity; - WithProximity.SemaProximityScore = 0.2f; - EXPECT_GT(WithProximity.evaluate(), Default.evaluate()); + SymbolRelevanceSignals WithSemaProximity; + WithSemaProximity.SemaProximityScore = 0.2f; + EXPECT_GT(WithSemaProximity.evaluate(), Default.evaluate()); + + SymbolRelevanceSignals IndexProximate; + IndexProximate.SymbolURI = "unittest:/foo/bar.h"; + llvm::StringMap ProxSources; + ProxSources.try_emplace(testPath("foo/baz.h")); + URIDistance Distance(ProxSources); + IndexProximate.FileProximityMatch = + EXPECT_GT(IndexProximate.evaluate(), Default.evaluate()); + SymbolRelevanceSignals IndexDistant = IndexProximate; + IndexDistant.SymbolURI = "unittest:/elsewhere/path.h"; + EXPECT_GT(IndexProximate.evaluate(), IndexDistant.evaluate()) + << IndexProximate << IndexDistant; + EXPECT_GT(IndexDistant.evaluate(), Default.evaluate()); SymbolRelevanceSignals Scoped; Scoped.Scope = SymbolRelevanceSignals::FileScope; @@ -180,59 +194,6 @@ EXPECT_LT(sortText(0, "a"), sortText(0, "z")); } -// {a,b,c} becomes /clangd-test/a/b/c -std::string joinPaths(llvm::ArrayRef Parts) { - return testPath( - llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator())); -} - -static constexpr float ProximityBase = 0.7; - -// Calculates a proximity score for an index symbol with declaration file -// SymPath with the given URI scheme. -float URIProximity(const FileProximityMatcher , StringRef SymPath, - StringRef Scheme = "file") { - auto U = URI::create(SymPath, Scheme); - EXPECT_TRUE(static_cast(U)) << llvm::toString(U.takeError()); - return Matcher.uriProximity(U->toString()); -} - -TEST(QualityTests, URIProximityScores) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"a", "b", "c", "d", "x"})}); - - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "x"})), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "y"})), - ProximityBase); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "y", "z"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "e", "y"})), - std::pow(ProximityBase, 2)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "m", "n", "o", "y"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "t", "m", "n", "o", "y"})), - std::pow(ProximityBase, 6)); - // Note the common directory is /clang-test/ - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "o", "p", "y"})), - std::pow(ProximityBase, 7)); -} - -TEST(QualityTests, URIProximityScoresWithTestURI) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"b", "c", "x"})}); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "c", "x"}), "unittest"), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "y"}), "unittest"), - std::pow(ProximityBase, 2)); - // unittest:///b/c/x vs unittest:///m/n/y. No common directory. - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "y"}), "unittest"), - std::pow(ProximityBase, 4)); -} - } // namespace } // namespace clangd } // namespace clang Index: unittests/clangd/HeadersTests.cpp === --- unittests/clangd/HeadersTests.cpp +++ unittests/clangd/HeadersTests.cpp @@ -64,18 +64,17 @@ } protected: - std::vector collectIncludes() { + IncludeStructure collectIncludes() { auto Clang = setupClang(); PreprocessOnlyAction Action; EXPECT_TRUE(
[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
ioeric added inline comments. Comment at: clangd/FileDistance.cpp:9 +//===--===// +// +// Is this intentionally reserved? Comment at: clangd/FileDistance.cpp:51 +for (StringRef Rest = Canonical; !Rest.empty();) { + Rest = parent_path(Rest, sys::path::Style::posix); + auto NextHash = hash_value(Rest); nit: this probably deserve a comment or a better formatting. It looks like part of the loop incremental. Comment at: clangd/FileDistance.cpp:86 + auto Canonical = canonicalize(Path); + unsigned Cost = kUnreachable; + SmallVector Ancestors; Should we also check the cache here? Comment at: unittests/clangd/FileDistanceTests.cpp:36 + // Ancestor (up+up+up+up) + EXPECT_EQ(D.distance("/"), 22u); + // Sibling (up+down) sammccall wrote: > ioeric wrote: > > I think this is an interesting case. IIUC, 22u is contributed by 4 ups > > (4*5) and 1 include (1*2, via `llvm/ADT/StringRef.h`). > > > > Suppose `a/b/c/d/e/f.cc` includes `base/x.h`, then the shortest path into > > directory `other/` is likely to go via `base/x.h`. This might become a > > problem if `base/x.h` is very commonly used. One solution is probably to > > penalize edits that hit the root. > Yeah, this could be a weakness. I don't think this is specific to #includes, > or to root directories. > I think the generalization is that there exist directories (like > /usr/include) whose siblings are unrelated. Down-traversals in these > directories should be more expensive. I've added a Caveats section to the > FileDistance documentation, but I would like to avoid adding special cases > until we can see how much difference they make. As chatted offline, this could be still concerning as it's not uncommon for files to include headers from top-level directories. I think the idea you proposed - restricting up traversals from included files, could address the problem. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D48441 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
sammccall added inline comments. Comment at: clangd/FileDistance.cpp:35 +: Opts(Opts) { + for (const auto : Roots) { +auto Canonical = canonicalize(R.getKey()); Note there was a nasty bug in FileDistance::FileDistance that failed to consider down edges. Fixed and test case added. Comment at: clangd/FileDistance.cpp:86 +return R.first->getSecond(); + if (auto U = clangd::URI::parse(URI)) { +LLVM_DEBUG(dbgs() << "distance(" << URI << ") = distance(" << U->body() ioeric wrote: > This is done for every index symbol, so I wonder if we could avoid parsing > URIs by assuming some URI structures. (note we cache the URI string, so it's at most once per file) We could, at the cost of potentially subtle bugs/dependencies. (what if the data doesn't make exactly the same choices regarding escaping?) parse() is actually *fairly* cheap. Unlike resolve() or create(), it doesn't touch the URI scheme plugins at all, so nothing's virtual. It's a few calls to percentDecode() and a few string allocations. I think there's room for optimization there (scheme should be a smallstring for one) but wouldn't go hacking around it unless it turns up on a profile. Comment at: clangd/FileDistance.h:56 + + FileDistance(llvm::StringMap Roots, + const FileDistanceOptions = {}); ioeric wrote: > nit: It's unclear what the values of Roots are (it can be confused with tree > roots or directory roots). Changed roots -> sources everywhere, which is less ambiguous. Comment at: clangd/Headers.h:62 + std::vector MainFileIncludes; + llvm::StringMap includeDepth(llvm::StringRef MainFileName) const; + ioeric wrote: > Does this return all transitive includes in the entire TU or just those from > `MainFileName`? And what's `MainFileName` here? Is it the same as the main > file in a TU? It's transitive... there was actually a comment but it was in the wrong place! Wrote a better comment and generalized slightly to make this seem less magic. You can actually ask for the files reachable starting at any file (MainFile is usually the one you want though). Comment at: clangd/Headers.h:67 + llvm::StringRef IncludedName, + llvm::StringRef IncludedRealName); + ioeric wrote: > The real name can be empty. How do we handle empty path? We attempt to record it every time we see it. Fixed includeDepth() to exclude files from the output if we never get a real name for them. This shouldn't be the case in practice - real name is missing for FileEntry when using a preamble, but we should see their names when *building* the preamble. Comment at: unittests/clangd/FileDistanceTests.cpp:36 + // Ancestor (up+up+up+up) + EXPECT_EQ(D.distance("/"), 22u); + // Sibling (up+down) ioeric wrote: > I think this is an interesting case. IIUC, 22u is contributed by 4 ups (4*5) > and 1 include (1*2, via `llvm/ADT/StringRef.h`). > > Suppose `a/b/c/d/e/f.cc` includes `base/x.h`, then the shortest path into > directory `other/` is likely to go via `base/x.h`. This might become a > problem if `base/x.h` is very commonly used. One solution is probably to > penalize edits that hit the root. Yeah, this could be a weakness. I don't think this is specific to #includes, or to root directories. I think the generalization is that there exist directories (like /usr/include) whose siblings are unrelated. Down-traversals in these directories should be more expensive. I've added a Caveats section to the FileDistance documentation, but I would like to avoid adding special cases until we can see how much difference they make. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D48441 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
sammccall updated this revision to Diff 152479. sammccall marked 9 inline comments as done. sammccall added a comment. Address review comments. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D48441 Files: clangd/CMakeLists.txt clangd/ClangdServer.cpp clangd/ClangdUnit.cpp clangd/ClangdUnit.h clangd/CodeComplete.cpp clangd/CodeComplete.h clangd/FileDistance.cpp clangd/FileDistance.h clangd/Headers.cpp clangd/Headers.h clangd/Quality.cpp clangd/Quality.h clangd/XRefs.cpp unittests/clangd/CMakeLists.txt unittests/clangd/FileDistanceTests.cpp unittests/clangd/HeadersTests.cpp unittests/clangd/QualityTests.cpp Index: unittests/clangd/QualityTests.cpp === --- unittests/clangd/QualityTests.cpp +++ unittests/clangd/QualityTests.cpp @@ -17,6 +17,7 @@ // //===--===// +#include "FileDistance.h" #include "Quality.h" #include "TestFS.h" #include "TestTU.h" @@ -157,9 +158,22 @@ PoorNameMatch.NameMatch = 0.2f; EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate()); - SymbolRelevanceSignals WithProximity; - WithProximity.SemaProximityScore = 0.2f; - EXPECT_GT(WithProximity.evaluate(), Default.evaluate()); + SymbolRelevanceSignals WithSemaProximity; + WithSemaProximity.SemaProximityScore = 0.2f; + EXPECT_GT(WithSemaProximity.evaluate(), Default.evaluate()); + + SymbolRelevanceSignals IndexProximate; + IndexProximate.SymbolURI = "unittest:/foo/bar.h"; + llvm::StringMap ProxRoots; + ProxRoots[testPath("foo/baz.h")] = 0; + URIDistance Distance(ProxRoots); + IndexProximate.FileProximityMatch = + EXPECT_GT(IndexProximate.evaluate(), Default.evaluate()); + SymbolRelevanceSignals IndexDistant = IndexProximate; + IndexDistant.SymbolURI = "unittest:/elsewhere/path.h"; + EXPECT_GT(IndexProximate.evaluate(), IndexDistant.evaluate()) + << IndexProximate << IndexDistant; + EXPECT_GT(IndexDistant.evaluate(), Default.evaluate()); SymbolRelevanceSignals Scoped; Scoped.Scope = SymbolRelevanceSignals::FileScope; @@ -180,59 +194,6 @@ EXPECT_LT(sortText(0, "a"), sortText(0, "z")); } -// {a,b,c} becomes /clangd-test/a/b/c -std::string joinPaths(llvm::ArrayRef Parts) { - return testPath( - llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator())); -} - -static constexpr float ProximityBase = 0.7; - -// Calculates a proximity score for an index symbol with declaration file -// SymPath with the given URI scheme. -float URIProximity(const FileProximityMatcher , StringRef SymPath, - StringRef Scheme = "file") { - auto U = URI::create(SymPath, Scheme); - EXPECT_TRUE(static_cast(U)) << llvm::toString(U.takeError()); - return Matcher.uriProximity(U->toString()); -} - -TEST(QualityTests, URIProximityScores) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"a", "b", "c", "d", "x"})}); - - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "x"})), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "y"})), - ProximityBase); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "y", "z"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "e", "y"})), - std::pow(ProximityBase, 2)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "m", "n", "o", "y"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "t", "m", "n", "o", "y"})), - std::pow(ProximityBase, 6)); - // Note the common directory is /clang-test/ - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "o", "p", "y"})), - std::pow(ProximityBase, 7)); -} - -TEST(QualityTests, URIProximityScoresWithTestURI) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"b", "c", "x"})}); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "c", "x"}), "unittest"), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "y"}), "unittest"), - std::pow(ProximityBase, 2)); - // unittest:///b/c/x vs unittest:///m/n/y. No common directory. - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "y"}), "unittest"), - std::pow(ProximityBase, 4)); -} - } // namespace } // namespace clangd } // namespace clang Index: unittests/clangd/HeadersTests.cpp === --- unittests/clangd/HeadersTests.cpp +++ unittests/clangd/HeadersTests.cpp @@ -64,18 +64,17 @@ } protected: - std::vector collectIncludes() { + IncludeStructure collectIncludes() { auto Clang = setupClang(); PreprocessOnlyAction Action; EXPECT_TRUE( Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0]));
[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
ioeric added a comment. Looks great! Thanks for doing this! The algorithm looks pretty efficient. It might still make sense to collect some performance numbers for real files with potentially large #include tree (we have plenty of those in our internal codebase :) Comment at: clangd/CodeComplete.cpp:908 + llvm::Optional Inserter; // Available during runWithSema. + llvm::Optional FileProximity; // Initialized once Sema runs. It might make sense to briefly explain somewhere how computational cost is distributed across the workflow. Comment at: clangd/CodeComplete.cpp:938 + SemaCCInput.FileName, SemaCCInput.Contents, + format::getLLVMStyle(), // XXX + SemaCCInput.Command.Directory, nit: `Style`? Comment at: clangd/FileDistance.cpp:72 + // Fill in all ancestors between the query and the known. + for (unsigned I = 0; I < Ancestors.size(); ++I) { +if (Cost != kUnreachable) nit: this loop would probably be easier to understand if iterated reversely. Comment at: clangd/FileDistance.cpp:86 +return R.first->getSecond(); + if (auto U = clangd::URI::parse(URI)) { +LLVM_DEBUG(dbgs() << "distance(" << URI << ") = distance(" << U->body() This is done for every index symbol, so I wonder if we could avoid parsing URIs by assuming some URI structures. Comment at: clangd/FileDistance.h:56 + + FileDistance(llvm::StringMap Roots, + const FileDistanceOptions = {}); nit: It's unclear what the values of Roots are (it can be confused with tree roots or directory roots). Comment at: clangd/Headers.cpp:28 + : SM(SM), Out(Out) { +SM.getMainFileID(); + } What does this do? Comment at: clangd/Headers.h:62 + std::vector MainFileIncludes; + llvm::StringMap includeDepth(llvm::StringRef MainFileName) const; + Does this return all transitive includes in the entire TU or just those from `MainFileName`? And what's `MainFileName` here? Is it the same as the main file in a TU? Comment at: clangd/Headers.h:67 + llvm::StringRef IncludedName, + llvm::StringRef IncludedRealName); + The real name can be empty. How do we handle empty path? Comment at: clangd/Headers.h:75 + // The paths we want to expose are the RealPathName, so store those too. + unsigned fileNumber(llvm::StringRef Name); + llvm::StringMap NameToIndex; // Values are file numbers. Maybe `fileIndex`? Comment at: clangd/Quality.cpp:233 +std::pair proximityScore(llvm::StringRef SymbolURI, + URIDistance *D) { nit: `static` Comment at: unittests/clangd/FileDistanceTests.cpp:36 + // Ancestor (up+up+up+up) + EXPECT_EQ(D.distance("/"), 22u); + // Sibling (up+down) I think this is an interesting case. IIUC, 22u is contributed by 4 ups (4*5) and 1 include (1*2, via `llvm/ADT/StringRef.h`). Suppose `a/b/c/d/e/f.cc` includes `base/x.h`, then the shortest path into directory `other/` is likely to go via `base/x.h`. This might become a problem if `base/x.h` is very commonly used. One solution is probably to penalize edits that hit the root. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D48441 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
sammccall updated this revision to Diff 152326. sammccall added a comment. Document some more stuff, and clang-format Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D48441 Files: clangd/CMakeLists.txt clangd/ClangdServer.cpp clangd/ClangdUnit.cpp clangd/ClangdUnit.h clangd/CodeComplete.cpp clangd/CodeComplete.h clangd/FileDistance.cpp clangd/FileDistance.h clangd/Headers.cpp clangd/Headers.h clangd/Quality.cpp clangd/Quality.h clangd/XRefs.cpp unittests/clangd/CMakeLists.txt unittests/clangd/FileDistanceTests.cpp unittests/clangd/HeadersTests.cpp unittests/clangd/QualityTests.cpp Index: unittests/clangd/QualityTests.cpp === --- unittests/clangd/QualityTests.cpp +++ unittests/clangd/QualityTests.cpp @@ -17,6 +17,7 @@ // //===--===// +#include "FileDistance.h" #include "Quality.h" #include "TestFS.h" #include "TestTU.h" @@ -157,9 +158,22 @@ PoorNameMatch.NameMatch = 0.2f; EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate()); - SymbolRelevanceSignals WithProximity; - WithProximity.SemaProximityScore = 0.2f; - EXPECT_GT(WithProximity.evaluate(), Default.evaluate()); + SymbolRelevanceSignals WithSemaProximity; + WithSemaProximity.SemaProximityScore = 0.2f; + EXPECT_GT(WithSemaProximity.evaluate(), Default.evaluate()); + + SymbolRelevanceSignals IndexProximate; + IndexProximate.SymbolURI = "unittest:/foo/bar.h"; + llvm::StringMap ProxRoots; + ProxRoots[testPath("foo/baz.h")] = 0; + URIDistance Distance(ProxRoots); + IndexProximate.FileProximityMatch = + EXPECT_GT(IndexProximate.evaluate(), Default.evaluate()); + SymbolRelevanceSignals IndexDistant = IndexProximate; + IndexDistant.SymbolURI = "unittest:/elsewhere/path.h"; + EXPECT_GT(IndexProximate.evaluate(), IndexDistant.evaluate()) + << IndexProximate << IndexDistant; + EXPECT_GT(IndexDistant.evaluate(), Default.evaluate()); SymbolRelevanceSignals Scoped; Scoped.Scope = SymbolRelevanceSignals::FileScope; @@ -180,59 +194,6 @@ EXPECT_LT(sortText(0, "a"), sortText(0, "z")); } -// {a,b,c} becomes /clangd-test/a/b/c -std::string joinPaths(llvm::ArrayRef Parts) { - return testPath( - llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator())); -} - -static constexpr float ProximityBase = 0.7; - -// Calculates a proximity score for an index symbol with declaration file -// SymPath with the given URI scheme. -float URIProximity(const FileProximityMatcher , StringRef SymPath, - StringRef Scheme = "file") { - auto U = URI::create(SymPath, Scheme); - EXPECT_TRUE(static_cast(U)) << llvm::toString(U.takeError()); - return Matcher.uriProximity(U->toString()); -} - -TEST(QualityTests, URIProximityScores) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"a", "b", "c", "d", "x"})}); - - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "x"})), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "y"})), - ProximityBase); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "y", "z"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "e", "y"})), - std::pow(ProximityBase, 2)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "m", "n", "o", "y"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "t", "m", "n", "o", "y"})), - std::pow(ProximityBase, 6)); - // Note the common directory is /clang-test/ - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "o", "p", "y"})), - std::pow(ProximityBase, 7)); -} - -TEST(QualityTests, URIProximityScoresWithTestURI) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"b", "c", "x"})}); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "c", "x"}), "unittest"), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "y"}), "unittest"), - std::pow(ProximityBase, 2)); - // unittest:///b/c/x vs unittest:///m/n/y. No common directory. - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "y"}), "unittest"), - std::pow(ProximityBase, 4)); -} - } // namespace } // namespace clangd } // namespace clang Index: unittests/clangd/HeadersTests.cpp === --- unittests/clangd/HeadersTests.cpp +++ unittests/clangd/HeadersTests.cpp @@ -64,18 +64,17 @@ } protected: - std::vector collectIncludes() { + IncludeStructure collectIncludes() { auto Clang = setupClang(); PreprocessOnlyAction Action; EXPECT_TRUE( Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])); -std::vector
[PATCH] D48441: [clangd] Incorporate transitive #includes into code complete proximity scoring.
sammccall created this revision. sammccall added a reviewer: ioeric. Herald added subscribers: cfe-commits, jkorous, MaskRay, ilya-biryukov, mgorny. We now compute a distance from the main file to the symbol header, which is a weighted count of: - some number of #include traversals from source file --> included file - some number of FS traversals from file --> parent directory - some number of FS traversals from parent directory --> child file/dir This calculation is performed in the appropriate URI scheme. This means we'll get some proximity boost from header files in main-file contexts, even when these are in different directory trees. This extended file proximity model is not yet incorporated in the index interface/implementation. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D48441 Files: clangd/CMakeLists.txt clangd/ClangdServer.cpp clangd/ClangdUnit.cpp clangd/ClangdUnit.h clangd/CodeComplete.cpp clangd/CodeComplete.h clangd/FileDistance.cpp clangd/FileDistance.h clangd/Headers.cpp clangd/Headers.h clangd/Quality.cpp clangd/Quality.h clangd/XRefs.cpp unittests/clangd/CMakeLists.txt unittests/clangd/FileDistanceTests.cpp unittests/clangd/HeadersTests.cpp unittests/clangd/QualityTests.cpp Index: unittests/clangd/QualityTests.cpp === --- unittests/clangd/QualityTests.cpp +++ unittests/clangd/QualityTests.cpp @@ -18,6 +18,7 @@ //===--===// #include "Quality.h" +#include "FileDistance.h" #include "TestFS.h" #include "TestTU.h" #include "gmock/gmock.h" @@ -157,9 +158,21 @@ PoorNameMatch.NameMatch = 0.2f; EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate()); - SymbolRelevanceSignals WithProximity; - WithProximity.SemaProximityScore = 0.2f; - EXPECT_GT(WithProximity.evaluate(), Default.evaluate()); + SymbolRelevanceSignals WithSemaProximity; + WithSemaProximity.SemaProximityScore = 0.2f; + EXPECT_GT(WithSemaProximity.evaluate(), Default.evaluate()); + + SymbolRelevanceSignals IndexProximate; + IndexProximate.SymbolURI = "unittest:/foo/bar.h"; + llvm::StringMap ProxRoots; + ProxRoots[testPath("foo/baz.h")] = 0; + URIDistance Distance(ProxRoots); + IndexProximate.FileProximityMatch = + EXPECT_GT(IndexProximate.evaluate(), Default.evaluate()); + SymbolRelevanceSignals IndexDistant = IndexProximate; + IndexDistant.SymbolURI = "unittest:/elsewhere/path.h"; + EXPECT_GT(IndexProximate.evaluate(), IndexDistant.evaluate()) << IndexProximate << IndexDistant; + EXPECT_GT(IndexDistant.evaluate(), Default.evaluate()); SymbolRelevanceSignals Scoped; Scoped.Scope = SymbolRelevanceSignals::FileScope; @@ -180,59 +193,6 @@ EXPECT_LT(sortText(0, "a"), sortText(0, "z")); } -// {a,b,c} becomes /clangd-test/a/b/c -std::string joinPaths(llvm::ArrayRef Parts) { - return testPath( - llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator())); -} - -static constexpr float ProximityBase = 0.7; - -// Calculates a proximity score for an index symbol with declaration file -// SymPath with the given URI scheme. -float URIProximity(const FileProximityMatcher , StringRef SymPath, - StringRef Scheme = "file") { - auto U = URI::create(SymPath, Scheme); - EXPECT_TRUE(static_cast(U)) << llvm::toString(U.takeError()); - return Matcher.uriProximity(U->toString()); -} - -TEST(QualityTests, URIProximityScores) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"a", "b", "c", "d", "x"})}); - - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "x"})), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "y"})), - ProximityBase); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "y", "z"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "e", "y"})), - std::pow(ProximityBase, 2)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "m", "n", "o", "y"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "t", "m", "n", "o", "y"})), - std::pow(ProximityBase, 6)); - // Note the common directory is /clang-test/ - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "o", "p", "y"})), - std::pow(ProximityBase, 7)); -} - -TEST(QualityTests, URIProximityScoresWithTestURI) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"b", "c", "x"})}); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "c", "x"}), "unittest"), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "y"}), "unittest"), - std::pow(ProximityBase, 2)); - // unittest:///b/c/x vs unittest:///m/n/y. No common directory. -