[PATCH] D51321: [Tooling] Improve handling of CL-style options
hamzasood added a comment. Thanks for the help with this. I reverted most of my changes (including the parameterised tests) before committing; I think I implemented all of your suggestions. Repository: rL LLVM https://reviews.llvm.org/D51321 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D51321: [Tooling] Improve handling of CL-style options
This revision was automatically updated to reflect the committed changes. Closed by commit rL341760: [Tooling] Improve handling of CL-style options (authored by hamzasood, committed by ). Herald added a subscriber: llvm-commits. Changed prior to commit: https://reviews.llvm.org/D51321?vs=163098&id=164585#toc Repository: rL LLVM https://reviews.llvm.org/D51321 Files: cfe/trunk/lib/Tooling/InterpolatingCompilationDatabase.cpp cfe/trunk/unittests/Tooling/CompilationDatabaseTest.cpp Index: cfe/trunk/unittests/Tooling/CompilationDatabaseTest.cpp === --- cfe/trunk/unittests/Tooling/CompilationDatabaseTest.cpp +++ cfe/trunk/unittests/Tooling/CompilationDatabaseTest.cpp @@ -648,14 +648,17 @@ protected: // Adds an entry to the underlying compilation database. // A flag is injected: -D , so the command used can be identified. - void add(llvm::StringRef File, llvm::StringRef Flags = "") { -llvm::SmallVector Argv = {"clang", File, "-D", File}; + void add(StringRef File, StringRef Clang, StringRef Flags) { +SmallVector Argv = {Clang, File, "-D", File}; llvm::SplitString(Flags, Argv); -llvm::SmallString<32> Dir; + +SmallString<32> Dir; llvm::sys::path::system_temp_directory(false, Dir); + Entries[path(File)].push_back( {Dir, path(File), {Argv.begin(), Argv.end()}, "foo.o"}); } + void add(StringRef File, StringRef Flags = "") { add(File, "clang", Flags); } // Turn a unix path fragment (foo/bar.h) into a native path (C:\tmp\foo\bar.h) std::string path(llvm::SmallString<32> File) { @@ -739,6 +742,30 @@ EXPECT_EQ(getCommand("foo/bar/baz/shout.C"), "clang -D FOO/BAR/BAZ/SHOUT.cc"); } +TEST_F(InterpolateTest, Aliasing) { + add("foo.cpp", "-faligned-new"); + + // The interpolated command should keep the given flag as written, even though + // the flag is internally represented as an alias. + EXPECT_EQ(getCommand("foo.hpp"), "clang -D foo.cpp -faligned-new"); +} + +TEST_F(InterpolateTest, ClangCL) { + add("foo.cpp", "clang-cl", "/W4"); + + // Language flags should be added with CL syntax. + EXPECT_EQ(getCommand("foo.h"), "clang-cl -D foo.cpp /W4 /TP"); +} + +TEST_F(InterpolateTest, DriverModes) { + add("foo.cpp", "clang-cl", "--driver-mode=gcc"); + add("bar.cpp", "clang", "--driver-mode=cl"); + + // --driver-mode overrides should be respected. + EXPECT_EQ(getCommand("foo.h"), "clang-cl -D foo.cpp --driver-mode=gcc -x c++-header"); + EXPECT_EQ(getCommand("bar.h"), "clang -D bar.cpp --driver-mode=cl /TP"); +} + TEST(CompileCommandTest, EqualityOperator) { CompileCommand CCRef("/foo/bar", "hello.c", {"a", "b"}, "hello.o"); CompileCommand CCTest = CCRef; Index: cfe/trunk/lib/Tooling/InterpolatingCompilationDatabase.cpp === --- cfe/trunk/lib/Tooling/InterpolatingCompilationDatabase.cpp +++ cfe/trunk/lib/Tooling/InterpolatingCompilationDatabase.cpp @@ -48,6 +48,7 @@ #include "clang/Frontend/LangStandard.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" @@ -127,47 +128,68 @@ Optional Type; // Standard specified by -std. LangStandard::Kind Std = LangStandard::lang_unspecified; + // Whether the command line is for the cl-compatible driver. + bool ClangCLMode; TransferableCommand(CompileCommand C) - : Cmd(std::move(C)), Type(guessType(Cmd.Filename)) { -std::vector NewArgs = {Cmd.CommandLine.front()}; + : Cmd(std::move(C)), Type(guessType(Cmd.Filename)), +ClangCLMode(checkIsCLMode(Cmd.CommandLine)) { +std::vector OldArgs = std::move(Cmd.CommandLine); +Cmd.CommandLine.clear(); + +// Wrap the old arguments in an InputArgList. +llvm::opt::InputArgList ArgList; +{ + SmallVector TmpArgv; + for (const std::string &S : OldArgs) +TmpArgv.push_back(S.c_str()); + ArgList = {TmpArgv.begin(), TmpArgv.end()}; +} + // Parse the old args in order to strip out and record unwanted flags. +// We parse each argument individually so that we can retain the exact +// spelling of each argument; re-rendering is lossy for aliased flags. +// E.g. in CL mode, /W4 maps to -Wall. auto OptTable = clang::driver::createDriverOptTable(); -std::vector Argv; -for (unsigned I = 1; I < Cmd.CommandLine.size(); ++I) - Argv.push_back(Cmd.CommandLine[I].c_str()); -unsigned MissingI, MissingC; -auto ArgList = OptTable->ParseArgs(Argv, MissingI, MissingC); -for (const auto *Arg : ArgList) { - const auto &option = Arg->getOption(); +Cmd.CommandLine.emplace_back(OldArgs.front()); +for (unsigned Pos = 1; Pos < OldArgs.size();) { + using namespace driver::options; + + const unsigned OldPos = Pos; + std::unique_ptr Arg(OptTable->ParseOn
[PATCH] D51321: [Tooling] Improve handling of CL-style options
sammccall accepted this revision. sammccall added a comment. This revision is now accepted and ready to land. Thanks again for fixing this. Still a few places I feel the code could be simpler, but will let you decide how to deal with them. I would greatly appreciate removing the parameterized tests, as that's the place where I feel least confident I can understand exactly what the intended behavior is. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:141 +{ + SmallVector TmpArgv; + TmpArgv.reserve(OldArgs.size()); please remove premature optimizations (SmallVector, reserve) - this function is not (any more) performance-critical Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:143 + TmpArgv.reserve(OldArgs.size()); + llvm::transform(OldArgs, std::back_inserter(TmpArgv), + [](const std::string &S) { return S.c_str(); }); simple loop is more readable than transform() Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:192 +const auto DriverModeName = + OptTable.getOption(driver::options::OPT_driver_mode).getPrefixedName(); + can we just inline this ("--driver-mode") like we do with other specific we need in their string form (std etc)? Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:196 +for (StringRef S : llvm::reverse(CmdLine)) { + if (S.startswith(DriverModeName)) +return S.drop_front(DriverModeName.length()) == "cl"; ``` if (S.consume_front(...)) return S == "cl"; ``` Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:124 +// Determine whether the given file path is the clang-cl executable. +static bool checkIsCLMode(const std::vector &CmdLine, + const llvm::opt::OptTable &OptTable) { hamzasood wrote: > hamzasood wrote: > > sammccall wrote: > > > nit: a two-value enum would be clearer and allow for terser variable > > > names (`Mode`) > > The advantage of a Boolean is that it makes the checks simpler. E.g. `if > > (isCL)` instead of `if (mode == DriverMode::CL)` or something. > > > > Happy to change it though if you still disagree. > Also, there're more than just 2 driver modes. But we only care about cl and > not cl. I do find `if (mode == DriverMode::CL)` much clearer. "CL" isn't a particularly clear name for people who don't deal with windows much, and "!isCL" isn't a great name for the GCC-compatible driver. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:156 + +// Transform the command line to an llvm ArgList. +// Make sure that OldArgs lives for at least as long as this variable as the hamzasood wrote: > hamzasood wrote: > > sammccall wrote: > > > would you mind reverting this change and just wrapping the old Argv in an > > > InputArgList? > > > I'm not sure the lifetime comments and std::transform aid readability > > > here. > > The change was more about safety than readability. The old approach left an > > InputArgList of dangling pointers after moving the new args into the cmd > > object. This way there’s no way to accidentally access deallocated memory > > by using the InputArgList. > > > > As above, happy to change if you still disagree. > I've restructured it so hopefully this is less of a concern. I think a comment `// ArgList is no longer valid` would suffice, or storing the ArgList in an `Optional` and resetting it. In principle the restructuring seems fine, but it introduced new issues: the boundaries and data flow between the constructor and `processInputCommandLine` is unclear. (It reads from parameters which are derived from `Cmd.CommandLine` and overwrites `Cmd.CommandLine` itself, along with other state. Its name doesn't help clarify what its responsibility is. If you want to keep this function separate, I'd suggest: - make it static to avoid confusion about state while the constructor is running - call it `parseCommandLine` to reflect the processing it's doing - return `tuple, DriverMode, LangStandard, Optional>` - call it as `std::tie(Cmd.CommandLine, Mode, ...) = parseCommandLine(...)` But it also seems fine as part of the constructor. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:231 : Type; - Result.CommandLine.push_back("-x"); - Result.CommandLine.push_back(types::getTypeName(TargetType)); + if (IsCLMode) { +const StringRef Flag = toCLFlag(TargetType); hamzasood wrote: > sammccall wrote: > > can we unify as `addTypeFlag(TargetType, Mode, Result.CommandLine)`? > To clarify, you mean extract this into a helper function? Right - you already have the helper function `toCLflag`, I'd suggest extending/renaming it so it covers both CL and GCC and fits the call site more co
[PATCH] D51321: [Tooling] Improve handling of CL-style options
hamzasood marked 10 inline comments as done. hamzasood added inline comments. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:124 +// Determine whether the given file path is the clang-cl executable. +static bool checkIsCLMode(const std::vector &CmdLine, + const llvm::opt::OptTable &OptTable) { hamzasood wrote: > sammccall wrote: > > nit: a two-value enum would be clearer and allow for terser variable names > > (`Mode`) > The advantage of a Boolean is that it makes the checks simpler. E.g. `if > (isCL)` instead of `if (mode == DriverMode::CL)` or something. > > Happy to change it though if you still disagree. Also, there're more than just 2 driver modes. But we only care about cl and not cl. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:156 + +// Transform the command line to an llvm ArgList. +// Make sure that OldArgs lives for at least as long as this variable as the hamzasood wrote: > sammccall wrote: > > would you mind reverting this change and just wrapping the old Argv in an > > InputArgList? > > I'm not sure the lifetime comments and std::transform aid readability here. > The change was more about safety than readability. The old approach left an > InputArgList of dangling pointers after moving the new args into the cmd > object. This way there’s no way to accidentally access deallocated memory by > using the InputArgList. > > As above, happy to change if you still disagree. I've restructured it so hopefully this is less of a concern. Comment at: unittests/Tooling/CompilationDatabaseTest.cpp:652 + +class InterpolateTest + : public ::testing::TestWithParam> { hamzasood wrote: > sammccall wrote: > > I'm not sure the parameterized test is the right model here. > > The `INSTANTIATE_TEST_P` and friends makes the test fixture pretty > > complicated, and failure messages hard to relate to input conditions. > > Then the actual tests have a bunch of assertions conditional on which mode > > we're in. > > Finally, we don't actually test any mixed-mode database. > > > > Could we write this a little more directly?: > > - pass the driver mode flag to `add()` in the normal way, in the Flags > > param > > - require callers to "add" to pass "clang" or "clang-cl". (It's OK that > > this makes existing cases a bit more verbose) > > - explicitly test the clang-cl and --driver-mode cases we care about, > > rather than running every assertion in every configuration > > > > e.g. > > ``` > > TEST_F(InterpolateTest, ClangCL) { > > add("foo.cc", "clang"); > > add("bar.cc", "clang-cl"); > > add("baz.cc", "clang --driver-mode=clang-cl"); > > > > EXPECT_EQ(getCommand("a/bar.h"), "clang-cl -D a/baz.cc /TP"); > > } > > ``` > The intent was to make sure that the existing cases (that shouldn’t depend on > the driver mode) don’t break, which admittedly happened while I was working > on the patch. > > Most of the tests aren’t conditional on the mode, and I think it’s important > that they’re tested in all configurations. The parameterisation also lets us > test as `clang-cl`, `clang-cl —driver-mode=cl`, and `clang —driver-mode=cl`. > Which are all valid ways of using CL mode. To clarify: the parameterisation runs the test 6 times. The `isTestingCL()` check narrows that down to 3. https://reviews.llvm.org/D51321 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D51321: [Tooling] Improve handling of CL-style options
hamzasood updated this revision to Diff 163098. hamzasood added a comment. I've addressed most of your comments and resolved the merge conflicts introduced in r340838. https://reviews.llvm.org/D51321 Files: lib/Tooling/InterpolatingCompilationDatabase.cpp unittests/Tooling/CompilationDatabaseTest.cpp Index: unittests/Tooling/CompilationDatabaseTest.cpp === --- unittests/Tooling/CompilationDatabaseTest.cpp +++ unittests/Tooling/CompilationDatabaseTest.cpp @@ -14,6 +14,8 @@ #include "clang/Tooling/JSONCompilationDatabase.h" #include "clang/Tooling/Tooling.h" #include "llvm/Support/Path.h" + +#define GTEST_HAS_COMBINE 1 #include "gtest/gtest.h" namespace clang { @@ -644,15 +646,53 @@ } }; -class InterpolateTest : public ::testing::Test { +enum class DriverExe { Clang, ClangCL }; +enum class DriverMode { Implicit, GCC, CL }; + +class InterpolateTest +: public ::testing::TestWithParam> { + + StringRef getClangExe() const { +switch (std::get<0>(GetParam())) { +case DriverExe::Clang: return "clang"; +case DriverExe::ClangCL: return "clang-cl"; +} +llvm_unreachable("Invalid DriverExe"); + } + + StringRef getDriverModeFlag() const { +switch (std::get<1>(GetParam())) { +case DriverMode::Implicit: return StringRef(); +case DriverMode::GCC: return "--driver-mode=gcc"; +case DriverMode::CL: return "--driver-mode=cl"; +} +llvm_unreachable("Invalid DriverMode"); + } + protected: + bool isTestingCL() const { +switch (std::get<1>(GetParam())) { +case DriverMode::Implicit: + return std::get<0>(GetParam()) == DriverExe::ClangCL; +case DriverMode::GCC: return false; +case DriverMode::CL: return true; +} +llvm_unreachable("Invalid DriverMode"); + } + // Adds an entry to the underlying compilation database. // A flag is injected: -D , so the command used can be identified. void add(llvm::StringRef File, llvm::StringRef Flags = "") { -llvm::SmallVector Argv = {"clang", File, "-D", File}; +llvm::SmallVector Argv; +Argv.push_back(getClangExe()); +if (!getDriverModeFlag().empty()) + Argv.push_back(getDriverModeFlag()); +Argv.append({"-D", File}); llvm::SplitString(Flags, Argv); + llvm::SmallString<32> Dir; llvm::sys::path::system_temp_directory(false, Dir); + Entries[path(File)].push_back( {Dir, path(File), {Argv.begin(), Argv.end()}, "foo.o"}); } @@ -675,69 +715,124 @@ ->getCompileCommands(path(F)); if (Results.empty()) return "none"; -// drop the input file argument, so tests don't have to deal with path(). -EXPECT_EQ(Results[0].CommandLine.back(), path(F)) -<< "Last arg should be the file"; -Results[0].CommandLine.pop_back(); -return llvm::join(Results[0].CommandLine, " "); + +// To simply the test checks, we're building a new command line that doesn't +// contain the uninteresting stuff (exe name etc.). +ArrayRef CmdLine = Results[0].CommandLine; + +// Drop the clang executable path. +EXPECT_EQ(CmdLine.front(), getClangExe()) << "First arg should be clang"; +CmdLine = CmdLine.drop_front(); + +// Drop the --driver-mode flag. +if (!getDriverModeFlag().empty()) { + EXPECT_EQ(CmdLine.front(), getDriverModeFlag()) +<< "Incorrect driver mode flag"; + CmdLine = CmdLine.drop_front(); +} + +// Drop the input file. +EXPECT_EQ(CmdLine.back(), path(F)) << "Last arg should be the file"; +CmdLine = CmdLine.drop_back(); + +return llvm::join(CmdLine, " "); } MemCDB::EntryMap Entries; }; -TEST_F(InterpolateTest, Nearby) { +TEST_P(InterpolateTest, Nearby) { add("dir/foo.cpp"); add("dir/bar.cpp"); add("an/other/foo.cpp"); // great: dir and name both match (prefix or full, case insensitive) - EXPECT_EQ(getCommand("dir/f.cpp"), "clang -D dir/foo.cpp"); - EXPECT_EQ(getCommand("dir/FOO.cpp"), "clang -D dir/foo.cpp"); + EXPECT_EQ(getCommand("dir/f.cpp"), "-D dir/foo.cpp"); + EXPECT_EQ(getCommand("dir/FOO.cpp"), "-D dir/foo.cpp"); // no name match. prefer matching dir, break ties by alpha - EXPECT_EQ(getCommand("dir/a.cpp"), "clang -D dir/bar.cpp"); + EXPECT_EQ(getCommand("dir/a.cpp"), "-D dir/bar.cpp"); // an exact name match beats one segment of directory match EXPECT_EQ(getCommand("some/other/bar.h"), -"clang -D dir/bar.cpp -x c++-header"); +isTestingCL() +? "-D dir/bar.cpp /TP" +: "-D dir/bar.cpp -x c++-header"); // two segments of directory match beat a prefix name match - EXPECT_EQ(getCommand("an/other/b.cpp"), "clang -D an/other/foo.cpp"); + EXPECT_EQ(getCommand("an/other/b.cpp"), "-D an/other/foo.cpp"); // if nothing matches at all, we still get the closest alpha match EXPECT_EQ(getCommand("below/some/obscure/path.cpp"), -"clang -D an/other/foo.cpp"); +"-D an/oth
[PATCH] D51321: [Tooling] Improve handling of CL-style options
hamzasood added a comment. Thanks for the detailed feedback; I’ll try to get a new patch up in a few hours. However I disagree with some of them (see replies). Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:124 +// Determine whether the given file path is the clang-cl executable. +static bool checkIsCLMode(const std::vector &CmdLine, + const llvm::opt::OptTable &OptTable) { sammccall wrote: > nit: a two-value enum would be clearer and allow for terser variable names > (`Mode`) The advantage of a Boolean is that it makes the checks simpler. E.g. `if (isCL)` instead of `if (mode == DriverMode::CL)` or something. Happy to change it though if you still disagree. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:156 + +// Transform the command line to an llvm ArgList. +// Make sure that OldArgs lives for at least as long as this variable as the sammccall wrote: > would you mind reverting this change and just wrapping the old Argv in an > InputArgList? > I'm not sure the lifetime comments and std::transform aid readability here. The change was more about safety than readability. The old approach left an InputArgList of dangling pointers after moving the new args into the cmd object. This way there’s no way to accidentally access deallocated memory by using the InputArgList. As above, happy to change if you still disagree. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:178 + { +unsigned Included, Excluded; +if (IsCLMode) { sammccall wrote: > This seems a bit verbose. > > First, do things actually break if we just use the default 0/0 masks? We're > not trying to interpret all the flags, just a few and pass the rest through. > > Failing that, it seems clearer to just use a ternary to initialize > Included/Excluded, or inline them completely. > (Please also drop the extra scope here). Theoretically it could break without the flags. We need to recognise input files and strip them from the command line. If someone on a UNIX platform has an input file called `/W3`, that’d instead be interpreted as a warning flag and it’ll be left in. Likewise for any file in the root directory with the same spelling as a CL-only argument. But yeah, I’ll inline the flags with a ternary. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:205 + if (const auto GivenStd = isStdArg(*Arg)) { +if (*GivenStd != LangStandard::lang_unspecified) + Std = *GivenStd; sammccall wrote: > prefer *either* optional<> or allowing the function to return > lang_unspecified, but not both. (There's no way a user can *explicitly* pass > a flag saying the language is unspecified, right?) Kind of: `-std=hello_there` is parsed as an unspecified language. We still want to strip the flag in this case, which won’t be possible if we also use the unspecified value to denote a lack of an `-std` flag. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:231 : Type; - Result.CommandLine.push_back("-x"); - Result.CommandLine.push_back(types::getTypeName(TargetType)); + if (IsCLMode) { +const StringRef Flag = toCLFlag(TargetType); sammccall wrote: > can we unify as `addTypeFlag(TargetType, Mode, Result.CommandLine)`? To clarify, you mean extract this into a helper function? Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:258 + +if (IsCLMode) + return Opt.matches(driver::options::OPT__SLASH_Fa) || sammccall wrote: > Why do we need to take IsClMode into account while reading flags? > How would `OPT__SLASH_Fa` get parsed if we weren't in CL mode? Would we care? > > (and below) It was an optimisation attempt in that we can do 5 less comparisons in the case where we know that there aren’t going to be any CL flags. Maybe I was trying too hard to micro-optimise... Comment at: unittests/Tooling/CompilationDatabaseTest.cpp:652 + +class InterpolateTest + : public ::testing::TestWithParam> { sammccall wrote: > I'm not sure the parameterized test is the right model here. > The `INSTANTIATE_TEST_P` and friends makes the test fixture pretty > complicated, and failure messages hard to relate to input conditions. > Then the actual tests have a bunch of assertions conditional on which mode > we're in. > Finally, we don't actually test any mixed-mode database. > > Could we write this a little more directly?: > - pass the driver mode flag to `add()` in the normal way, in the Flags param > - require callers to "add" to pass "clang" or "clang-cl". (It's OK that this > makes existing cases a bit more verbose) > - explicitly test the clang-cl and --driver-mode cases we care about, rather > than running every assert
[PATCH] D51321: [Tooling] Improve handling of CL-style options
sammccall added a comment. Thanks for fixing this! Mostly just picky style comments. In particular, I know that some of the other maintainers here are just as ignorant of the cl driver as I am, and I want to make sure that it's still possible to follow the logic and debug unrelated problems without needing to know too much about it. If some of the style bits is too annoying or unclear, happy to do some of it myself as a followup, let me know! Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:156-169 +// Transform the command line to an llvm ArgList. +// Make sure that OldArgs lives for at least as long as this variable as the +// arg list contains pointers to the OldArgs strings. +llvm::opt::InputArgList ArgList; +{ + // Unfortunately InputArgList requires an array of c-strings whereas we + // have an array of std::string; we'll need an intermediate vector. hamzasood wrote: > rnk wrote: > > I think the old for loop was nicer. I don't think this code needs to > > change, you should be able to use ParseArgs with the extra flag args. > The problem with the old loop is that we lose aliasing information (e.g. > `/W3` becomes `-Wall`). While `ParseOneArg` also performs alias conversion, > it gives us indices for each individual argument. The last line of the new > loop copies arguments by using that index information to read from `OldArgs`, > which gives us the original spelling. Makes sense, please add a comment summarizing. ("We don't use ParseArgs, as we must pass through the exact spelling of uninteresting args. Re-rendering is lossy for clang-cl options, e.g. /W3 -> -Wall") Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:124 +// Determine whether the given file path is the clang-cl executable. +static bool checkIsCLMode(const std::vector &CmdLine, + const llvm::opt::OptTable &OptTable) { nit: a two-value enum would be clearer and allow for terser variable names (`Mode`) Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:156 + +// Transform the command line to an llvm ArgList. +// Make sure that OldArgs lives for at least as long as this variable as the would you mind reverting this change and just wrapping the old Argv in an InputArgList? I'm not sure the lifetime comments and std::transform aid readability here. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:175 +Cmd.CommandLine.emplace_back(OldArgs.front()); +for (unsigned Start = 1, End = Start; End < OldArgs.size(); Start = End) { + std::unique_ptr Arg; these names are somewhat confusing - "End" neither marks the end of the loop nor the end of the current item (as it's initialized to Start). What about: ``` for (unsigned Pos = 1; Pos < OldArgs.size();) { ... unsigned OldPos = Pos; Arg = ParseOneArg(..., Pos); ... NewArgs.insert(NewArgs.end(), &OldArgs[OldPos], &OldArgs[Pos]); } ``` Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:178 + { +unsigned Included, Excluded; +if (IsCLMode) { This seems a bit verbose. First, do things actually break if we just use the default 0/0 masks? We're not trying to interpret all the flags, just a few and pass the rest through. Failing that, it seems clearer to just use a ternary to initialize Included/Excluded, or inline them completely. (Please also drop the extra scope here). Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:186 +} +Arg = std::unique_ptr( + OptTable->ParseOneArg(ArgList, End, Included, Excluded)); Arg.reset(OptTable->ParseOneArg...) Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:194 // Strip input and output files. - if (option.matches(clang::driver::options::OPT_INPUT) || - option.matches(clang::driver::options::OPT_o)) { + if (isUntypedInputOrOutput(*Arg)) continue; can we inline this and just `||` all the options together here? Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:197 + + // Strip type specifiers, but record the overridden language. + if (const auto GivenType = isTypeSpecArg(*Arg)) { please keep the mentions of -x etc even if not comprehensive - it's hard to precisely+tersely describe these flags in prose. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:203 + + // Strip std, but record the value. + if (const auto GivenStd = isStdArg(*Arg)) { ditto --std Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:205 + if (const auto GivenStd = isStdArg(*Arg)) { +if (*GivenStd != LangStandard::lang_unspecified) +
[PATCH] D51321: [Tooling] Improve handling of CL-style options
hamzasood updated this revision to Diff 162872. hamzasood added a comment. - Re-uploaded with full context. Yeah, I encountered these issues while using clangd on Windows. I haven't run into any clangd issues after applying this patch, but admittedly my usage is very basic (pretty much just code completion). It may well be subtly broken in other places. https://reviews.llvm.org/D51321 Files: lib/Tooling/InterpolatingCompilationDatabase.cpp unittests/Tooling/CompilationDatabaseTest.cpp Index: unittests/Tooling/CompilationDatabaseTest.cpp === --- unittests/Tooling/CompilationDatabaseTest.cpp +++ unittests/Tooling/CompilationDatabaseTest.cpp @@ -14,6 +14,8 @@ #include "clang/Tooling/JSONCompilationDatabase.h" #include "clang/Tooling/Tooling.h" #include "llvm/Support/Path.h" + +#define GTEST_HAS_COMBINE 1 #include "gtest/gtest.h" namespace clang { @@ -644,15 +646,50 @@ } }; -class InterpolateTest : public ::testing::Test { +enum class DriverExe { Clang, ClangCL }; +enum class DriverMode { None, GCC, CL }; + +class InterpolateTest + : public ::testing::TestWithParam> { + protected: + bool isTestingCL() const { +switch (std::get<1>(GetParam())) { +case DriverMode::None: return std::get<0>(GetParam()) == DriverExe::ClangCL; +case DriverMode::GCC: return false; +case DriverMode::CL: return true; +default: llvm_unreachable("Invalid DriverMode"); +} + } + StringRef getClangExe() const { +switch (std::get<0>(GetParam())) { +case DriverExe::Clang: return "clang"; +case DriverExe::ClangCL: return "clang-cl"; +default: llvm_unreachable("Invalid DriverExe"); +} + } + StringRef getDriverModeArg() const { +switch (std::get<1>(GetParam())) { +case DriverMode::None: return StringRef(); +case DriverMode::GCC: return "--driver-mode=gcc"; +case DriverMode::CL: return "--driver-mode=cl"; +default: llvm_unreachable("Invalid DriverMode"); +} + } + // Adds an entry to the underlying compilation database. // A flag is injected: -D , so the command used can be identified. void add(llvm::StringRef File, llvm::StringRef Flags = "") { -llvm::SmallVector Argv = {"clang", File, "-D", File}; +llvm::SmallVector Argv; +Argv.push_back(getClangExe()); +if (!getDriverModeArg().empty()) + Argv.push_back(getDriverModeArg()); +Argv.append({"-D", File}); llvm::SplitString(Flags, Argv); + llvm::SmallString<32> Dir; llvm::sys::path::system_temp_directory(false, Dir); + Entries[path(File)].push_back( {Dir, path(File), {Argv.begin(), Argv.end()}, "foo.o"}); } @@ -675,67 +712,101 @@ ->getCompileCommands(path(F)); if (Results.empty()) return "none"; -// drop the input file argument, so tests don't have to deal with path(). -EXPECT_EQ(Results[0].CommandLine.back(), path(F)) -<< "Last arg should be the file"; -Results[0].CommandLine.pop_back(); -return llvm::join(Results[0].CommandLine, " "); + +ArrayRef CmdLine = Results[0].CommandLine; + +// Drop the clang executable path. +EXPECT_EQ(CmdLine.front(), getClangExe()) << "First arg should be clang"; +CmdLine = CmdLine.drop_front(); + +// Drop the --driver-mode flag. +if (!getDriverModeArg().empty()) { + EXPECT_EQ(CmdLine.front(), getDriverModeArg()) + << "Expected driver mode arg"; + CmdLine = CmdLine.drop_front(); +} + +// Drop the input file. +EXPECT_EQ(CmdLine.back(), path(F)) << "Last arg should be the file"; +CmdLine = CmdLine.drop_back(); + +return llvm::join(CmdLine, " "); } MemCDB::EntryMap Entries; }; -TEST_F(InterpolateTest, Nearby) { +TEST_P(InterpolateTest, Nearby) { add("dir/foo.cpp"); add("dir/bar.cpp"); add("an/other/foo.cpp"); // great: dir and name both match (prefix or full, case insensitive) - EXPECT_EQ(getCommand("dir/f.cpp"), "clang -D dir/foo.cpp"); - EXPECT_EQ(getCommand("dir/FOO.cpp"), "clang -D dir/foo.cpp"); + EXPECT_EQ(getCommand("dir/f.cpp"), "-D dir/foo.cpp"); + EXPECT_EQ(getCommand("dir/FOO.cpp"), "-D dir/foo.cpp"); // no name match. prefer matching dir, break ties by alpha - EXPECT_EQ(getCommand("dir/a.cpp"), "clang -D dir/bar.cpp"); + EXPECT_EQ(getCommand("dir/a.cpp"), "-D dir/bar.cpp"); // an exact name match beats one segment of directory match EXPECT_EQ(getCommand("some/other/bar.h"), -"clang -D dir/bar.cpp -x c++-header"); +isTestingCL() +? "-D dir/bar.cpp /TP" +: "-D dir/bar.cpp -x c++-header"); // two segments of directory match beat a prefix name match - EXPECT_EQ(getCommand("an/other/b.cpp"), "clang -D an/other/foo.cpp"); + EXPECT_EQ(getCommand("an/other/b.cpp"), "-D an/other/foo.cpp"); // if nothing matches at all, we still get the closest alpha match EXPECT_EQ(getCommand("below/some/obscure/path.cpp"), -
[PATCH] D51321: [Tooling] Improve handling of CL-style options
sammccall added a comment. Hi, sorry about overlooking cl mode flags when adding this, I was blissfully unaware that `compile_commands.json` could use that syntax :-) Out of curiosity, are you using this with clangd or some other tool? I'm sure there are places where clangd injects unixy flags... Will take a look and try to understand. Be aware of https://reviews.llvm.org/D51314 which is fixing some nasty performance pitfalls of InterpolatingCompilationDatabase (it should be logic neutral, but moves around the parsing code). Could you reupload the patch with context? https://reviews.llvm.org/D51321 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D51321: [Tooling] Improve handling of CL-style options
hamzasood marked an inline comment as done. hamzasood added inline comments. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:136 + // Otherwise just check the clang file name. + return llvm::sys::path::stem(CmdLine.front()).endswith_lower("clang-cl"); +} rnk wrote: > We support being called "CL.exe", but with our new VS integration, I don't > think it matters to check for this case anymore. We may remove it over time. Should I add the check anyway? Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:140 -unsigned MissingI, MissingC; -auto ArgList = OptTable->ParseArgs(Argv, MissingI, MissingC); -for (const auto *Arg : ArgList) { rnk wrote: > Can you not keep using ParseArgs? It takes FlagsToInclude and FlagsToExclude, > which you can compute outside the loop now. Good point, I forgot to move the flags out of the loop when moving the —driver-mode check. But I don’t think ParseArgs is suitable (see the other comment response). Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:156-169 +// Transform the command line to an llvm ArgList. +// Make sure that OldArgs lives for at least as long as this variable as the +// arg list contains pointers to the OldArgs strings. +llvm::opt::InputArgList ArgList; +{ + // Unfortunately InputArgList requires an array of c-strings whereas we + // have an array of std::string; we'll need an intermediate vector. rnk wrote: > I think the old for loop was nicer. I don't think this code needs to change, > you should be able to use ParseArgs with the extra flag args. The problem with the old loop is that we lose aliasing information (e.g. `/W3` becomes `-Wall`). While `ParseOneArg` also performs alias conversion, it gives us indices for each individual argument. The last line of the new loop copies arguments by using that index information to read from `OldArgs`, which gives us the original spelling. https://reviews.llvm.org/D51321 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D51321: [Tooling] Improve handling of CL-style options
rnk added inline comments. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:136 + // Otherwise just check the clang file name. + return llvm::sys::path::stem(CmdLine.front()).endswith_lower("clang-cl"); +} We support being called "CL.exe", but with our new VS integration, I don't think it matters to check for this case anymore. We may remove it over time. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:140 -unsigned MissingI, MissingC; -auto ArgList = OptTable->ParseArgs(Argv, MissingI, MissingC); -for (const auto *Arg : ArgList) { Can you not keep using ParseArgs? It takes FlagsToInclude and FlagsToExclude, which you can compute outside the loop now. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:156-169 +// Transform the command line to an llvm ArgList. +// Make sure that OldArgs lives for at least as long as this variable as the +// arg list contains pointers to the OldArgs strings. +llvm::opt::InputArgList ArgList; +{ + // Unfortunately InputArgList requires an array of c-strings whereas we + // have an array of std::string; we'll need an intermediate vector. I think the old for loop was nicer. I don't think this code needs to change, you should be able to use ParseArgs with the extra flag args. https://reviews.llvm.org/D51321 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D51321: [Tooling] Improve handling of CL-style options
hamzasood updated this revision to Diff 162762. hamzasood added a comment. Thanks, I've changed it so that the driver mode is calculated first. I've also extended the unit test to include the various `--driver-mode` combinations. https://reviews.llvm.org/D51321 Files: lib/Tooling/InterpolatingCompilationDatabase.cpp unittests/Tooling/CompilationDatabaseTest.cpp Index: unittests/Tooling/CompilationDatabaseTest.cpp === --- unittests/Tooling/CompilationDatabaseTest.cpp +++ unittests/Tooling/CompilationDatabaseTest.cpp @@ -14,6 +14,8 @@ #include "clang/Tooling/JSONCompilationDatabase.h" #include "clang/Tooling/Tooling.h" #include "llvm/Support/Path.h" + +#define GTEST_HAS_COMBINE 1 #include "gtest/gtest.h" namespace clang { @@ -644,15 +646,50 @@ } }; -class InterpolateTest : public ::testing::Test { +enum class DriverExe { Clang, ClangCL }; +enum class DriverMode { None, GCC, CL }; + +class InterpolateTest + : public ::testing::TestWithParam> { + protected: + bool isTestingCL() const { +switch (std::get<1>(GetParam())) { +case DriverMode::None: return std::get<0>(GetParam()) == DriverExe::ClangCL; +case DriverMode::GCC: return false; +case DriverMode::CL: return true; +default: llvm_unreachable("Invalid DriverMode"); +} + } + StringRef getClangExe() const { +switch (std::get<0>(GetParam())) { +case DriverExe::Clang: return "clang"; +case DriverExe::ClangCL: return "clang-cl"; +default: llvm_unreachable("Invalid DriverExe"); +} + } + StringRef getDriverModeArg() const { +switch (std::get<1>(GetParam())) { +case DriverMode::None: return StringRef(); +case DriverMode::GCC: return "--driver-mode=gcc"; +case DriverMode::CL: return "--driver-mode=cl"; +default: llvm_unreachable("Invalid DriverMode"); +} + } + // Adds an entry to the underlying compilation database. // A flag is injected: -D , so the command used can be identified. void add(llvm::StringRef File, llvm::StringRef Flags = "") { -llvm::SmallVector Argv = {"clang", File, "-D", File}; +llvm::SmallVector Argv; +Argv.push_back(getClangExe()); +if (!getDriverModeArg().empty()) + Argv.push_back(getDriverModeArg()); +Argv.append({"-D", File}); llvm::SplitString(Flags, Argv); + llvm::SmallString<32> Dir; llvm::sys::path::system_temp_directory(false, Dir); + Entries[path(File)].push_back( {Dir, path(File), {Argv.begin(), Argv.end()}, "foo.o"}); } @@ -675,67 +712,101 @@ ->getCompileCommands(path(F)); if (Results.empty()) return "none"; -// drop the input file argument, so tests don't have to deal with path(). -EXPECT_EQ(Results[0].CommandLine.back(), path(F)) -<< "Last arg should be the file"; -Results[0].CommandLine.pop_back(); -return llvm::join(Results[0].CommandLine, " "); + +ArrayRef CmdLine = Results[0].CommandLine; + +// Drop the clang executable path. +EXPECT_EQ(CmdLine.front(), getClangExe()) << "First arg should be clang"; +CmdLine = CmdLine.drop_front(); + +// Drop the --driver-mode flag. +if (!getDriverModeArg().empty()) { + EXPECT_EQ(CmdLine.front(), getDriverModeArg()) + << "Expected driver mode arg"; + CmdLine = CmdLine.drop_front(); +} + +// Drop the input file. +EXPECT_EQ(CmdLine.back(), path(F)) << "Last arg should be the file"; +CmdLine = CmdLine.drop_back(); + +return llvm::join(CmdLine, " "); } MemCDB::EntryMap Entries; }; -TEST_F(InterpolateTest, Nearby) { +TEST_P(InterpolateTest, Nearby) { add("dir/foo.cpp"); add("dir/bar.cpp"); add("an/other/foo.cpp"); // great: dir and name both match (prefix or full, case insensitive) - EXPECT_EQ(getCommand("dir/f.cpp"), "clang -D dir/foo.cpp"); - EXPECT_EQ(getCommand("dir/FOO.cpp"), "clang -D dir/foo.cpp"); + EXPECT_EQ(getCommand("dir/f.cpp"), "-D dir/foo.cpp"); + EXPECT_EQ(getCommand("dir/FOO.cpp"), "-D dir/foo.cpp"); // no name match. prefer matching dir, break ties by alpha - EXPECT_EQ(getCommand("dir/a.cpp"), "clang -D dir/bar.cpp"); + EXPECT_EQ(getCommand("dir/a.cpp"), "-D dir/bar.cpp"); // an exact name match beats one segment of directory match EXPECT_EQ(getCommand("some/other/bar.h"), -"clang -D dir/bar.cpp -x c++-header"); +isTestingCL() +? "-D dir/bar.cpp /TP" +: "-D dir/bar.cpp -x c++-header"); // two segments of directory match beat a prefix name match - EXPECT_EQ(getCommand("an/other/b.cpp"), "clang -D an/other/foo.cpp"); + EXPECT_EQ(getCommand("an/other/b.cpp"), "-D an/other/foo.cpp"); // if nothing matches at all, we still get the closest alpha match EXPECT_EQ(getCommand("below/some/obscure/path.cpp"), -"clang -D an/other/foo.cpp"); +"-D an/other/foo.cpp"); } -TEST_F(InterpolateTest, Language) { - add("dir/foo.cpp"
[PATCH] D51321: [Tooling] Improve handling of CL-style options
rnk added inline comments. Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:182-184 + if (Arg->getOption().matches(driver::options::OPT_driver_mode)) { +// Process --driver-mode. +IsCLMode = std::strcmp(Arg->getValue(), "cl") == 0; I think you need to do this up front, otherwise you'll parse some args in non-cl mode and some in cl mode. Repository: rC Clang https://reviews.llvm.org/D51321 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D51321: [Tooling] Improve handling of CL-style options
hamzasood created this revision. hamzasood added reviewers: rnk, hokein, sammccall. Herald added a subscriber: cfe-commits. This patch fixes the handling of clang-cl options in `InterpolatingCompilationDatabase`. They were previously ignored completely, which led to a lot of bugs: - Additional options were being added with the wrong syntax. E.g. a file was specified as C++ by adding `-x c++`, which causes an error in CL mode. - The args were parsed and then rendered, which means that the aliasing information was lost. E.g. `/W3` was rendered to `-Wall`, which in CL mode means `-Weverything`. - CL options were ignored when checking things like `-std=`, so a lot of logic was being bypassed. Repository: rC Clang https://reviews.llvm.org/D51321 Files: lib/Tooling/InterpolatingCompilationDatabase.cpp unittests/Tooling/CompilationDatabaseTest.cpp Index: unittests/Tooling/CompilationDatabaseTest.cpp === --- unittests/Tooling/CompilationDatabaseTest.cpp +++ unittests/Tooling/CompilationDatabaseTest.cpp @@ -644,12 +644,15 @@ } }; -class InterpolateTest : public ::testing::Test { +class InterpolateTest : public ::testing::TestWithParam { protected: + bool isTestingCL() const { return GetParam(); } + StringRef getClangExe() const { return isTestingCL() ? "clang-cl" : "clang"; } + // Adds an entry to the underlying compilation database. // A flag is injected: -D , so the command used can be identified. void add(llvm::StringRef File, llvm::StringRef Flags = "") { -llvm::SmallVector Argv = {"clang", File, "-D", File}; +llvm::SmallVector Argv = {getClangExe(), File, "-D", File}; llvm::SplitString(Flags, Argv); llvm::SmallString<32> Dir; llvm::sys::path::system_temp_directory(false, Dir); @@ -675,67 +678,92 @@ ->getCompileCommands(path(F)); if (Results.empty()) return "none"; -// drop the input file argument, so tests don't have to deal with path(). -EXPECT_EQ(Results[0].CommandLine.back(), path(F)) -<< "Last arg should be the file"; -Results[0].CommandLine.pop_back(); -return llvm::join(Results[0].CommandLine, " "); + +ArrayRef CmdLine = Results[0].CommandLine; + +// drop the clang path, so tests don't have to deal with getClangExe(). +EXPECT_EQ(CmdLine.front(), getClangExe()) << "First arg should be clang"; +CmdLine = CmdLine.drop_front(); + +// Drop the input file argument, so tests don't have to deal with path(). +EXPECT_EQ(CmdLine.back(), path(F)) << "Last arg should be the file"; +CmdLine = CmdLine.drop_back(); + +return llvm::join(CmdLine, " "); } MemCDB::EntryMap Entries; }; -TEST_F(InterpolateTest, Nearby) { +TEST_P(InterpolateTest, Nearby) { add("dir/foo.cpp"); add("dir/bar.cpp"); add("an/other/foo.cpp"); // great: dir and name both match (prefix or full, case insensitive) - EXPECT_EQ(getCommand("dir/f.cpp"), "clang -D dir/foo.cpp"); - EXPECT_EQ(getCommand("dir/FOO.cpp"), "clang -D dir/foo.cpp"); + EXPECT_EQ(getCommand("dir/f.cpp"), "-D dir/foo.cpp"); + EXPECT_EQ(getCommand("dir/FOO.cpp"), "-D dir/foo.cpp"); // no name match. prefer matching dir, break ties by alpha - EXPECT_EQ(getCommand("dir/a.cpp"), "clang -D dir/bar.cpp"); + EXPECT_EQ(getCommand("dir/a.cpp"), "-D dir/bar.cpp"); // an exact name match beats one segment of directory match EXPECT_EQ(getCommand("some/other/bar.h"), -"clang -D dir/bar.cpp -x c++-header"); +isTestingCL() +? "-D dir/bar.cpp /TP" +: "-D dir/bar.cpp -x c++-header"); // two segments of directory match beat a prefix name match - EXPECT_EQ(getCommand("an/other/b.cpp"), "clang -D an/other/foo.cpp"); + EXPECT_EQ(getCommand("an/other/b.cpp"), "-D an/other/foo.cpp"); // if nothing matches at all, we still get the closest alpha match EXPECT_EQ(getCommand("below/some/obscure/path.cpp"), -"clang -D an/other/foo.cpp"); +"-D an/other/foo.cpp"); } -TEST_F(InterpolateTest, Language) { - add("dir/foo.cpp", "-std=c++17"); - add("dir/baz.cee", "-x c"); +TEST_P(InterpolateTest, Language) { + add("dir/foo.cpp", isTestingCL() ? "/std:c++17" : "-std=c++17"); + add("dir/baz.cee", isTestingCL() ? "/TC" : "-x c"); // .h is ambiguous, so we add explicit language flags EXPECT_EQ(getCommand("foo.h"), -"clang -D dir/foo.cpp -x c++-header -std=c++17"); +isTestingCL() +? "-D dir/foo.cpp /TP /std:c++17" +: "-D dir/foo.cpp -x c++-header -std=c++17"); // and don't add -x if the inferred language is correct. - EXPECT_EQ(getCommand("foo.hpp"), "clang -D dir/foo.cpp -std=c++17"); + EXPECT_EQ(getCommand("foo.hpp"), +isTestingCL() +? "-D dir/foo.cpp /std:c++17" +: "-D dir/foo.cpp -std=c++17"); // respect -x if it's already there. - EXPECT_EQ(getCommand("baz.h"), "clang -