[PATCH] D52047: [clangd] Add building benchmark and memory consumption tracking
kbobyrev updated this revision to Diff 167267. kbobyrev added a comment. Pass data from I/O to `readIndexFile(StringRef)`. https://reviews.llvm.org/D52047 Files: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp Index: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp === --- clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp +++ clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp @@ -12,6 +12,7 @@ #include "benchmark/benchmark.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/Regex.h" #include @@ -33,6 +34,25 @@ return loadIndex(IndexFilename, {}, true); } +SymbolSlab loadSlab() { + auto Buffer = llvm::MemoryBuffer::getFile(IndexFilename); + if (!Buffer) { +llvm::errs() << "Can't open " << IndexFilename << '\n'; +exit(1); + } + + auto Slab = readIndexFile((*Buffer)->getBuffer()); + if (!Slab) { +llvm::errs() << llvm::toString(Slab.takeError()) << '\n'; +exit(1); + } else if (!Slab->Symbols) { +llvm::errs() << "Couldn't read symbol slab from the index file: " + << IndexFilename << '\n'; +exit(1); + } + return std::move(*Slab->Symbols); +} + // Reads JSON array of serialized FuzzyFindRequest's from user-provided file. std::vector extractQueriesFromLogs() { std::ifstream InputStream(RequestsFilename); @@ -73,24 +93,81 @@ for (const auto : Requests) Mem->fuzzyFind(Request, [](const Symbol ) {}); } -BENCHMARK(MemQueries); +BENCHMARK(MemQueries)->Unit(benchmark::kMillisecond); static void DexQueries(benchmark::State ) { const auto Dex = buildDex(); const auto Requests = extractQueriesFromLogs(); for (auto _ : State) for (const auto : Requests) Dex->fuzzyFind(Request, [](const Symbol ) {}); } -BENCHMARK(DexQueries); +BENCHMARK(DexQueries)->Unit(benchmark::kMillisecond); + +// This is not a *real* benchmark: it shows size of built MemIndex (in bytes). +// Same for the next "benchmark". +// FIXME(kbobyrev): Track memory usage caused by different index parts: +// SymbolSlab and RefSlabs, InverseIndex, PostingLists of different types for +// Dex, etc. +static void MemSize(benchmark::State ) { + const auto Mem = buildMem(); + for (auto _ : State) +// Divide size of Mem by 1000 so that it will be correctly displayed in the +// benchmark report (possible options for time units are ms, ns and us). +State.SetIterationTime(/*double Seconds=*/Mem->estimateMemoryUsage() / + 1000.0); + State.SetLabel("This tracks in-memory size of MemIndex (SymbolSlab + Index " + "overhead) in bytes"); +} +BENCHMARK(MemSize) +->UseManualTime() +->Unit(benchmark::kMillisecond) +->Iterations(1); + +static void DexSize(benchmark::State ) { + const auto Dex = buildDex(); + + for (auto _ : State) +State.SetIterationTime(Dex->estimateMemoryUsage() / 1000.0); + State.SetLabel("This tracks in-memory size of Dex (SymbolSlab + Index " + "overhead) in bytes"); +} +BENCHMARK(DexSize) +->UseManualTime() +->Unit(benchmark::kMillisecond) +->Iterations(1); + +static void DexOverhead(benchmark::State ) { + const auto Slab = loadSlab(); + const auto Dex = buildDex(); + for (auto _ : State) +State.SetIterationTime((Dex->estimateMemoryUsage() - Slab.bytes()) / + 1000.0); + State.SetLabel("This tracks in-memory size of Dex Index overhead (and " + "excludes underlying SymbolSlab size) in bytes"); +} +BENCHMARK(DexOverhead) +->UseManualTime() +->Unit(benchmark::kMillisecond) +->Iterations(1); + +static void SymbolSlabLoading(benchmark::State ) { + for (auto _ : State) +benchmark::DoNotOptimize(loadSlab()); +} +BENCHMARK(SymbolSlabLoading)->Unit(benchmark::kMillisecond); + +static void DexBuild(benchmark::State ) { + auto Slab = loadSlab(); + for (auto _ : State) +benchmark::DoNotOptimize(dex::Dex::build(std::move(Slab), {})); +} +BENCHMARK(DexBuild)->Unit(benchmark::kMillisecond); } // namespace } // namespace clangd } // namespace clang -// FIXME(kbobyrev): Add index building time benchmarks. -// FIXME(kbobyrev): Add memory consumption "benchmarks" by manually measuring -// in-memory index size and reporting it as time. // FIXME(kbobyrev): Create a logger wrapper to suppress debugging info printer. int main(int argc, char *argv[]) { if (argc < 3) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D52047: [clangd] Add building benchmark and memory consumption tracking
kbobyrev added inline comments. Comment at: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp:96 -static void DexQueries(benchmark::State ) { +// This is not a *real* benchmark: it shows size of built MemIndex (in bytes). +// Same for the next "benchmark". sammccall wrote: > This looks like it'll be really stable, why does it need to be a benchmark vs > say a dexp subcommand? As discussed offline, this is meant to make it easier for people to investigate memory + performance changes and simplify the development pipeline as opposed to remembering multiple binaries and their options and running all these binaries after each change. https://reviews.llvm.org/D52047 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D52047: [clangd] Add building benchmark and memory consumption tracking
kbobyrev updated this revision to Diff 167067. kbobyrev marked 2 inline comments as done. kbobyrev added a comment. Don't include misc changes elsewhere: focus on adding more benchmarks in this revision. https://reviews.llvm.org/D52047 Files: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp Index: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp === --- clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp +++ clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp @@ -33,6 +33,19 @@ return loadIndex(IndexFilename, {}, true); } +SymbolSlab loadSlab() { + auto IndexFile = readIndexFile(IndexFilename); + if (!IndexFile) { +llvm::errs() << llvm::toString(IndexFile.takeError()) << '\n'; +exit(1); + } else if (!IndexFile->Symbols) { +llvm::errs() << "Couldn't read symbol slab from the index file: " + << IndexFilename << '\n'; +exit(1); + } + return std::move(*IndexFile->Symbols); +} + // Reads JSON array of serialized FuzzyFindRequest's from user-provided file. std::vector extractQueriesFromLogs() { std::ifstream InputStream(RequestsFilename); @@ -73,24 +86,81 @@ for (const auto : Requests) Mem->fuzzyFind(Request, [](const Symbol ) {}); } -BENCHMARK(MemQueries); +BENCHMARK(MemQueries)->Unit(benchmark::kMillisecond); static void DexQueries(benchmark::State ) { const auto Dex = buildDex(); const auto Requests = extractQueriesFromLogs(); for (auto _ : State) for (const auto : Requests) Dex->fuzzyFind(Request, [](const Symbol ) {}); } -BENCHMARK(DexQueries); +BENCHMARK(DexQueries)->Unit(benchmark::kMillisecond); + +// This is not a *real* benchmark: it shows size of built MemIndex (in bytes). +// Same for the next "benchmark". +// FIXME(kbobyrev): Track memory usage caused by different index parts: +// SymbolSlab and RefSlabs, InverseIndex, PostingLists of different types for +// Dex, etc. +static void MemSize(benchmark::State ) { + const auto Mem = buildMem(); + for (auto _ : State) +// Divide size of Mem by 1000 so that it will be correctly displayed in the +// benchmark report (possible options for time units are ms, ns and us). +State.SetIterationTime(/*double Seconds=*/Mem->estimateMemoryUsage() / + 1000.0); + State.SetLabel("This tracks in-memory size of MemIndex (SymbolSlab + Index " + "overhead) in bytes"); +} +BENCHMARK(MemSize) +->UseManualTime() +->Unit(benchmark::kMillisecond) +->Iterations(1); + +static void DexSize(benchmark::State ) { + const auto Dex = buildDex(); + + for (auto _ : State) +State.SetIterationTime(Dex->estimateMemoryUsage() / 1000.0); + State.SetLabel("This tracks in-memory size of Dex (SymbolSlab + Index " + "overhead) in bytes"); +} +BENCHMARK(DexSize) +->UseManualTime() +->Unit(benchmark::kMillisecond) +->Iterations(1); + +static void DexOverhead(benchmark::State ) { + const auto Slab = loadSlab(); + const auto Dex = buildDex(); + for (auto _ : State) +State.SetIterationTime((Dex->estimateMemoryUsage() - Slab.bytes()) / + 1000.0); + State.SetLabel("This tracks in-memory size of Dex Index overhead (and " + "excludes underlying SymbolSlab size) in bytes"); +} +BENCHMARK(DexOverhead) +->UseManualTime() +->Unit(benchmark::kMillisecond) +->Iterations(1); + +static void SymbolSlabLoading(benchmark::State ) { + for (auto _ : State) +benchmark::DoNotOptimize(loadSlab()); +} +BENCHMARK(SymbolSlabLoading)->Unit(benchmark::kMillisecond); + +static void DexBuild(benchmark::State ) { + auto Slab = loadSlab(); + for (auto _ : State) +benchmark::DoNotOptimize(dex::Dex::build(std::move(Slab), {})); +} +BENCHMARK(DexBuild)->Unit(benchmark::kMillisecond); } // namespace } // namespace clangd } // namespace clang -// FIXME(kbobyrev): Add index building time benchmarks. -// FIXME(kbobyrev): Add memory consumption "benchmarks" by manually measuring -// in-memory index size and reporting it as time. // FIXME(kbobyrev): Create a logger wrapper to suppress debugging info printer. int main(int argc, char *argv[]) { if (argc < 3) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D52047: [clangd] Add building benchmark and memory consumption tracking
sammccall requested changes to this revision. sammccall added a comment. This revision now requires changes to proceed. This change does several things, and I think most of them need further thought. Can we discuss Monday? Comment at: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp:96 -static void DexQueries(benchmark::State ) { +// This is not a *real* benchmark: it shows size of built MemIndex (in bytes). +// Same for the next "benchmark". This looks like it'll be really stable, why does it need to be a benchmark vs say a dexp subcommand? https://reviews.llvm.org/D52047 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D52047: [clangd] Add building benchmark and memory consumption tracking
ioeric added inline comments. Comment at: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp:78 +static void DexQueries(benchmark::State ) { + const auto Dex = buildDex(); Why did we move this benchmark (`DexQueries`)? It seems unnecessary. Comment at: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp:106 +// benchmark report (possible options for time units are ms, ns and us). +State.SetIterationTime(/*double Seconds=*/Mem->estimateMemoryUsage() / + 1000); nit: maybe divide by `1000.0` to avoid precision loss? Comment at: clang-tools-extra/clangd/index/SymbolYAML.cpp:187 +llvm::Expected symbolsFromFile(llvm::StringRef SymbolFilename) { + auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); Could you put this near the old `loadIndex` to preserve the revision history? https://reviews.llvm.org/D52047 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D52047: [clangd] Add building benchmark and memory consumption tracking
kbobyrev updated this revision to Diff 166481. kbobyrev marked 3 inline comments as done. kbobyrev added a comment. - Be more explicit about the nature of "benchmarks" with memory tracking logic via `State::SetLabel(...)`. - Force single iteration for "benchmarks" with memory usage tracking - Add another "benchmark" with `Dex` memory overhead over plain `SymbolSlab` Huge thanks to @lebedev.ri for helping! This looks much better to me now. https://reviews.llvm.org/D52047 Files: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp clang-tools-extra/clangd/index/Serialization.cpp clang-tools-extra/clangd/index/SymbolYAML.cpp clang-tools-extra/clangd/index/SymbolYAML.h clang-tools-extra/clangd/index/dex/Dex.cpp Index: clang-tools-extra/clangd/index/dex/Dex.cpp === --- clang-tools-extra/clangd/index/dex/Dex.cpp +++ clang-tools-extra/clangd/index/dex/Dex.cpp @@ -124,9 +124,6 @@ for (const auto : TempInvertedIndex) InvertedIndex.insert({TokenToPostingList.first, PostingList(move(TokenToPostingList.second))}); - - vlog("Built Dex with estimated memory usage {0} bytes.", - estimateMemoryUsage()); } /// Constructs iterators over tokens extracted from the query and exhausts it @@ -238,8 +235,9 @@ Bytes += SymbolQuality.size() * sizeof(float); Bytes += LookupTable.getMemorySize(); Bytes += InvertedIndex.getMemorySize(); - for (const auto : InvertedIndex) -Bytes += P.second.bytes(); + for (const auto : InvertedIndex) +Bytes += TokenAndPostingList.first.Data.size() + + TokenAndPostingList.second.bytes(); return Bytes + BackingDataSize; } Index: clang-tools-extra/clangd/index/SymbolYAML.h === --- clang-tools-extra/clangd/index/SymbolYAML.h +++ clang-tools-extra/clangd/index/SymbolYAML.h @@ -29,6 +29,9 @@ // Read symbols from a YAML-format string. SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent); +// Read symbols from YAML or RIFF file. +llvm::Expected symbolsFromFile(llvm::StringRef SymbolFilename); + // Read one symbol from a YAML-stream. // The returned symbol is backed by Input. Symbol SymbolFromYAML(llvm::yaml::Input ); Index: clang-tools-extra/clangd/index/SymbolYAML.cpp === --- clang-tools-extra/clangd/index/SymbolYAML.cpp +++ clang-tools-extra/clangd/index/SymbolYAML.cpp @@ -9,6 +9,7 @@ #include "SymbolYAML.h" #include "Index.h" +#include "Logger.h" #include "Serialization.h" #include "Trace.h" #include "dex/Dex.h" @@ -172,6 +173,7 @@ namespace clangd { SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent) { + trace::Span Tracer("ParseYAML"); llvm::yaml::Input Yin(YAMLContent); std::vector S; Yin >> S; @@ -182,6 +184,24 @@ return std::move(Syms).build(); } +llvm::Expected symbolsFromFile(llvm::StringRef SymbolFilename) { + auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); + if (!Buffer) +return llvm::make_error( +("Can't open " + SymbolFilename).str(), llvm::errc::invalid_argument); + + StringRef Data = Buffer->get()->getBuffer(); + + if (!Data.startswith("RIFF")) { // Magic for binary index file. +return symbolsFromYAML(Data); + } + auto RIFF = readIndexFile(Data); + return RIFF ? llvm::Expected(std::move(*RIFF->Symbols)) + : llvm::make_error( +("Can't open " + SymbolFilename).str(), +llvm::errc::invalid_argument); +} + Symbol SymbolFromYAML(llvm::yaml::Input ) { Symbol S; Input >> S; @@ -206,30 +226,16 @@ llvm::ArrayRef URISchemes, bool UseDex) { trace::Span OverallTracer("LoadIndex"); - auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); - if (!Buffer) { -llvm::errs() << "Can't open " << SymbolFilename << "\n"; -return nullptr; - } - StringRef Data = Buffer->get()->getBuffer(); - - llvm::Optional Slab; - if (Data.startswith("RIFF")) { // Magic for binary index file. -trace::Span Tracer("ParseRIFF"); -if (auto RIFF = readIndexFile(Data)) - Slab = std::move(RIFF->Symbols); -else - llvm::errs() << "Bad RIFF: " << llvm::toString(RIFF.takeError()) << "\n"; - } else { -trace::Span Tracer("ParseYAML"); -Slab = symbolsFromYAML(Data); - } - + auto Slab = symbolsFromFile(SymbolFilename); if (!Slab) return nullptr; trace::Span Tracer("BuildIndex"); - return UseDex ? dex::Dex::build(std::move(*Slab), URISchemes) -: MemIndex::build(std::move(*Slab), RefSlab()); + auto Index = UseDex ? dex::Dex::build(std::move(*Slab), URISchemes) + : MemIndex::build(std::move(*Slab), RefSlab()); + vlog("Loaded {0} from {1} with estimated memory usage {2}", + UseDex ? "Dex" : "MemIndex", SymbolFilename, +
[PATCH] D52047: [clangd] Add building benchmark and memory consumption tracking
lebedev.ri added inline comments. Comment at: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp:81 +// Same for the next "benchmark". +// FIXME(kbobyrev): Should this be separated into the BackingMemorySize +// (underlying SymbolSlab size) and Symbol Index (MemIndex/Dex) overhead? kbobyrev wrote: > ilya-biryukov wrote: > > Given the trick we use for display, how are we going to show **two** memory > > uses? > As discussed offline, this hack also relies on the fact that benchmark has a > dynamic nature of determining iterations count. Giving a large number of > "time units" to the counter results in a single iteration. > > I've tried to understand whether I could use any flags for [[ > https://github.com/google/benchmark#user-defined-counters | User-Defined > Counter ]] that could just divide the number of iterations by > `IterationTime`, but I could find one that would do exactly what is needed > here. Also, I didn't find any way to manually set the iteration count. > divide the number of iterations by IterationTime And more unsolicited advices: [[ https://github.com/google/benchmark/blob/1b44120cd16712f3b5decd95dc8ff2813574b273/include/benchmark/benchmark.h#L366-L368 | `kIsIterationInvariantRate` ]], but it is master-only, not in any release. For now, do ``` State.counters["kIsIterationInvariantRate"] = benchmark::Counter( state.iterations(), benchmark::Counter::Flags::kIsRate); ``` If understood the question right. > Also, I didn't find any way to manually set the iteration count. [[ https://github.com/google/benchmark/blob/1b44120cd16712f3b5decd95dc8ff2813574b273/include/benchmark/benchmark.h#L853-L859 | `benchmark::Benchmark::Iterations()` ]] https://reviews.llvm.org/D52047 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D52047: [clangd] Add building benchmark and memory consumption tracking
kbobyrev added inline comments. Comment at: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp:81 +// Same for the next "benchmark". +// FIXME(kbobyrev): Should this be separated into the BackingMemorySize +// (underlying SymbolSlab size) and Symbol Index (MemIndex/Dex) overhead? ilya-biryukov wrote: > Given the trick we use for display, how are we going to show **two** memory > uses? As discussed offline, this hack also relies on the fact that benchmark has a dynamic nature of determining iterations count. Giving a large number of "time units" to the counter results in a single iteration. I've tried to understand whether I could use any flags for [[ https://github.com/google/benchmark#user-defined-counters | User-Defined Counter ]] that could just divide the number of iterations by `IterationTime`, but I could find one that would do exactly what is needed here. Also, I didn't find any way to manually set the iteration count. Comment at: clang-tools-extra/clangd/index/dex/Dex.cpp:239 for (const auto : InvertedIndex) -Bytes += P.second.bytes(); +Bytes += P.first.Data.size() + P.second.bytes(); return Bytes + BackingDataSize; ilya-biryukov wrote: > Just to clarify: `P.first.Data.size()` is the size of the arena for all the > symbols? `P` is `std::pair` here, so that would be `Token.Data.size()` which is the size of `std::string` stored in Token (e.g. trigram symbols for `Trigram` tokens, directory URI for `ProximityPath` tokens, etc). `P` is probably bad here, I'll change the naming to be more explicit. https://reviews.llvm.org/D52047 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D52047: [clangd] Add building benchmark and memory consumption tracking
kbobyrev updated this revision to Diff 166462. kbobyrev marked 4 inline comments as done. kbobyrev added a comment. Use `llvm::Expected`, cleanup the patch. https://reviews.llvm.org/D52047 Files: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp clang-tools-extra/clangd/index/Serialization.cpp clang-tools-extra/clangd/index/SymbolYAML.cpp clang-tools-extra/clangd/index/SymbolYAML.h clang-tools-extra/clangd/index/dex/Dex.cpp Index: clang-tools-extra/clangd/index/dex/Dex.cpp === --- clang-tools-extra/clangd/index/dex/Dex.cpp +++ clang-tools-extra/clangd/index/dex/Dex.cpp @@ -124,9 +124,6 @@ for (const auto : TempInvertedIndex) InvertedIndex.insert({TokenToPostingList.first, PostingList(move(TokenToPostingList.second))}); - - vlog("Built Dex with estimated memory usage {0} bytes.", - estimateMemoryUsage()); } /// Constructs iterators over tokens extracted from the query and exhausts it @@ -238,8 +235,9 @@ Bytes += SymbolQuality.size() * sizeof(float); Bytes += LookupTable.getMemorySize(); Bytes += InvertedIndex.getMemorySize(); - for (const auto : InvertedIndex) -Bytes += P.second.bytes(); + for (const auto : InvertedIndex) +Bytes += TokenAndPostingList.first.Data.size() + + TokenAndPostingList.second.bytes(); return Bytes + BackingDataSize; } Index: clang-tools-extra/clangd/index/SymbolYAML.h === --- clang-tools-extra/clangd/index/SymbolYAML.h +++ clang-tools-extra/clangd/index/SymbolYAML.h @@ -29,6 +29,9 @@ // Read symbols from a YAML-format string. SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent); +// Read symbols from YAML or RIFF file. +llvm::Expected symbolsFromFile(llvm::StringRef SymbolFilename); + // Read one symbol from a YAML-stream. // The returned symbol is backed by Input. Symbol SymbolFromYAML(llvm::yaml::Input ); Index: clang-tools-extra/clangd/index/SymbolYAML.cpp === --- clang-tools-extra/clangd/index/SymbolYAML.cpp +++ clang-tools-extra/clangd/index/SymbolYAML.cpp @@ -9,6 +9,7 @@ #include "SymbolYAML.h" #include "Index.h" +#include "Logger.h" #include "Serialization.h" #include "Trace.h" #include "dex/Dex.h" @@ -172,6 +173,7 @@ namespace clangd { SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent) { + trace::Span Tracer("ParseYAML"); llvm::yaml::Input Yin(YAMLContent); std::vector S; Yin >> S; @@ -182,6 +184,24 @@ return std::move(Syms).build(); } +llvm::Expected symbolsFromFile(llvm::StringRef SymbolFilename) { + auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); + if (!Buffer) +return llvm::make_error( +("Can't open " + SymbolFilename).str(), llvm::errc::invalid_argument); + + StringRef Data = Buffer->get()->getBuffer(); + + if (!Data.startswith("RIFF")) { // Magic for binary index file. +return symbolsFromYAML(Data); + } + auto RIFF = readIndexFile(Data); + return RIFF ? llvm::Expected(std::move(*RIFF->Symbols)) + : llvm::make_error( +("Can't open " + SymbolFilename).str(), +llvm::errc::invalid_argument); +} + Symbol SymbolFromYAML(llvm::yaml::Input ) { Symbol S; Input >> S; @@ -206,30 +226,16 @@ llvm::ArrayRef URISchemes, bool UseDex) { trace::Span OverallTracer("LoadIndex"); - auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); - if (!Buffer) { -llvm::errs() << "Can't open " << SymbolFilename << "\n"; -return nullptr; - } - StringRef Data = Buffer->get()->getBuffer(); - - llvm::Optional Slab; - if (Data.startswith("RIFF")) { // Magic for binary index file. -trace::Span Tracer("ParseRIFF"); -if (auto RIFF = readIndexFile(Data)) - Slab = std::move(RIFF->Symbols); -else - llvm::errs() << "Bad RIFF: " << llvm::toString(RIFF.takeError()) << "\n"; - } else { -trace::Span Tracer("ParseYAML"); -Slab = symbolsFromYAML(Data); - } - + auto Slab = symbolsFromFile(SymbolFilename); if (!Slab) return nullptr; trace::Span Tracer("BuildIndex"); - return UseDex ? dex::Dex::build(std::move(*Slab), URISchemes) -: MemIndex::build(std::move(*Slab), RefSlab()); + auto Index = UseDex ? dex::Dex::build(std::move(*Slab), URISchemes) + : MemIndex::build(std::move(*Slab), RefSlab()); + vlog("Loaded {0} from {1} with estimated memory usage {2}", + UseDex ? "Dex" : "MemIndex", SymbolFilename, + Index->estimateMemoryUsage()); + return Index; } } // namespace clangd Index: clang-tools-extra/clangd/index/Serialization.cpp === --- clang-tools-extra/clangd/index/Serialization.cpp +++
[PATCH] D52047: [clangd] Add building benchmark and memory consumption tracking
ilya-biryukov added a comment. Also not sure about the trick: - Would be surprising to see the "ms" instead of "mbytes" - Time tends to vary between runs, so using Google Benchmark's capabilities to run multiple times and report aggregated times seems useful. Not so much for the memory usage: it's deterministic and running multiple times is just a waste of time. I wonder if a separate (and trivial) C++ binary that would load the index and report the index size is any worse than the benchmark hack? What are the pros of the latter? Comment at: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp:81 +// Same for the next "benchmark". +// FIXME(kbobyrev): Should this be separated into the BackingMemorySize +// (underlying SymbolSlab size) and Symbol Index (MemIndex/Dex) overhead? Given the trick we use for display, how are we going to show **two** memory uses? Comment at: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp:154 ::benchmark::RunSpecifiedBenchmarks(); + return 0; } Since `return 0` is implied for main in C++, there's no need to add one. May add clarity though, so feel free to keep it! Comment at: clang-tools-extra/clangd/index/SymbolYAML.cpp:189 + if (!Buffer) { +llvm::errs() << "Can't open " << SymbolFilename << "\n"; +return llvm::None; Return `llvm::Expected` instead of `Optional` and create errors with the specified text instead. This is a slightly better option for the library code (callers can report errors in various ways, e.g. one can imagine reporting it in the LSP instead of stderr) Comment at: clang-tools-extra/clangd/index/SymbolYAML.cpp:200 +else + llvm::errs() << "Bad RIFF: " << llvm::toString(RIFF.takeError()) << "\n"; + } else { Same here: just propagate the error to the caller. Comment at: clang-tools-extra/clangd/index/dex/Dex.cpp:239 for (const auto : InvertedIndex) -Bytes += P.second.bytes(); +Bytes += P.first.Data.size() + P.second.bytes(); return Bytes + BackingDataSize; Just to clarify: `P.first.Data.size()` is the size of the arena for all the symbols? https://reviews.llvm.org/D52047 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D52047: [clangd] Add building benchmark and memory consumption tracking
kbobyrev updated this revision to Diff 166271. kbobyrev added a comment. Remove `BuildMem` benchmark, which collects data about `MemIndex` building time (which is essentially nothing and therefore not really interesting). https://reviews.llvm.org/D52047 Files: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp clang-tools-extra/clangd/index/SymbolYAML.cpp clang-tools-extra/clangd/index/SymbolYAML.h clang-tools-extra/clangd/index/dex/Dex.cpp Index: clang-tools-extra/clangd/index/dex/Dex.cpp === --- clang-tools-extra/clangd/index/dex/Dex.cpp +++ clang-tools-extra/clangd/index/dex/Dex.cpp @@ -124,9 +124,6 @@ for (const auto : TempInvertedIndex) InvertedIndex.insert({TokenToPostingList.first, PostingList(move(TokenToPostingList.second))}); - - vlog("Built Dex with estimated memory usage {0} bytes.", - estimateMemoryUsage()); } /// Constructs iterators over tokens extracted from the query and exhausts it @@ -239,7 +236,7 @@ Bytes += LookupTable.getMemorySize(); Bytes += InvertedIndex.getMemorySize(); for (const auto : InvertedIndex) -Bytes += P.second.bytes(); +Bytes += P.first.Data.size() + P.second.bytes(); return Bytes + BackingDataSize; } Index: clang-tools-extra/clangd/index/SymbolYAML.h === --- clang-tools-extra/clangd/index/SymbolYAML.h +++ clang-tools-extra/clangd/index/SymbolYAML.h @@ -29,6 +29,9 @@ // Read symbols from a YAML-format string. SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent); +// Read symbols from YAML or RIFF file. +llvm::Optional symbolsFromFile(llvm::StringRef SymbolFilename); + // Read one symbol from a YAML-stream. // The returned symbol is backed by Input. Symbol SymbolFromYAML(llvm::yaml::Input ); Index: clang-tools-extra/clangd/index/SymbolYAML.cpp === --- clang-tools-extra/clangd/index/SymbolYAML.cpp +++ clang-tools-extra/clangd/index/SymbolYAML.cpp @@ -9,6 +9,7 @@ #include "SymbolYAML.h" #include "Index.h" +#include "Logger.h" #include "Serialization.h" #include "Trace.h" #include "dex/Dex.h" @@ -182,6 +183,28 @@ return std::move(Syms).build(); } +llvm::Optional symbolsFromFile(llvm::StringRef SymbolFilename) { + auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); + if (!Buffer) { +llvm::errs() << "Can't open " << SymbolFilename << "\n"; +return llvm::None; + } + StringRef Data = Buffer->get()->getBuffer(); + + llvm::Optional Slab; + if (Data.startswith("RIFF")) { // Magic for binary index file. +trace::Span Tracer("ParseRIFF"); +if (auto RIFF = readIndexFile(Data)) + Slab = std::move(RIFF->Symbols); +else + llvm::errs() << "Bad RIFF: " << llvm::toString(RIFF.takeError()) << "\n"; + } else { +trace::Span Tracer("ParseYAML"); +Slab = symbolsFromYAML(Data); + } + return Slab; +} + Symbol SymbolFromYAML(llvm::yaml::Input ) { Symbol S; Input >> S; @@ -206,30 +229,16 @@ llvm::ArrayRef URISchemes, bool UseDex) { trace::Span OverallTracer("LoadIndex"); - auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); - if (!Buffer) { -llvm::errs() << "Can't open " << SymbolFilename << "\n"; -return nullptr; - } - StringRef Data = Buffer->get()->getBuffer(); - - llvm::Optional Slab; - if (Data.startswith("RIFF")) { // Magic for binary index file. -trace::Span Tracer("ParseRIFF"); -if (auto RIFF = readIndexFile(Data)) - Slab = std::move(RIFF->Symbols); -else - llvm::errs() << "Bad RIFF: " << llvm::toString(RIFF.takeError()) << "\n"; - } else { -trace::Span Tracer("ParseYAML"); -Slab = symbolsFromYAML(Data); - } - + auto Slab = symbolsFromFile(SymbolFilename); if (!Slab) return nullptr; trace::Span Tracer("BuildIndex"); - return UseDex ? dex::Dex::build(std::move(*Slab), URISchemes) -: MemIndex::build(std::move(*Slab), RefSlab()); + auto Index = UseDex ? dex::Dex::build(std::move(*Slab), URISchemes) + : MemIndex::build(std::move(*Slab), RefSlab()); + vlog("Loaded {0} from {1} with estimated memory usage {2}", + UseDex ? "Dex" : "MemIndex", SymbolFilename, + Index->estimateMemoryUsage()); + return Index; } } // namespace clangd Index: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp === --- clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp +++ clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp @@ -33,6 +33,16 @@ return loadIndex(IndexFilename, {}, true); } +SymbolSlab loadSlab() { + auto Slab = symbolsFromFile(IndexFilename); + if (!Slab) { +llvm::errs() << "Error when loading SymbolSlab from file: " << IndexFilename + << '\n'; +
[PATCH] D52047: [clangd] Add building benchmark and memory consumption tracking
kbobyrev updated this revision to Diff 166269. kbobyrev added a comment. Add benchmark for `SymbolSlab` loading from file. This might be useful for RIFF/YAML symbol loader optimizations. https://reviews.llvm.org/D52047 Files: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp clang-tools-extra/clangd/index/SymbolYAML.cpp clang-tools-extra/clangd/index/SymbolYAML.h clang-tools-extra/clangd/index/dex/Dex.cpp Index: clang-tools-extra/clangd/index/dex/Dex.cpp === --- clang-tools-extra/clangd/index/dex/Dex.cpp +++ clang-tools-extra/clangd/index/dex/Dex.cpp @@ -124,9 +124,6 @@ for (const auto : TempInvertedIndex) InvertedIndex.insert({TokenToPostingList.first, PostingList(move(TokenToPostingList.second))}); - - vlog("Built Dex with estimated memory usage {0} bytes.", - estimateMemoryUsage()); } /// Constructs iterators over tokens extracted from the query and exhausts it @@ -239,7 +236,7 @@ Bytes += LookupTable.getMemorySize(); Bytes += InvertedIndex.getMemorySize(); for (const auto : InvertedIndex) -Bytes += P.second.bytes(); +Bytes += P.first.Data.size() + P.second.bytes(); return Bytes + BackingDataSize; } Index: clang-tools-extra/clangd/index/SymbolYAML.h === --- clang-tools-extra/clangd/index/SymbolYAML.h +++ clang-tools-extra/clangd/index/SymbolYAML.h @@ -29,6 +29,9 @@ // Read symbols from a YAML-format string. SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent); +// Read symbols from YAML or RIFF file. +llvm::Optional symbolsFromFile(llvm::StringRef SymbolFilename); + // Read one symbol from a YAML-stream. // The returned symbol is backed by Input. Symbol SymbolFromYAML(llvm::yaml::Input ); Index: clang-tools-extra/clangd/index/SymbolYAML.cpp === --- clang-tools-extra/clangd/index/SymbolYAML.cpp +++ clang-tools-extra/clangd/index/SymbolYAML.cpp @@ -9,6 +9,7 @@ #include "SymbolYAML.h" #include "Index.h" +#include "Logger.h" #include "Serialization.h" #include "Trace.h" #include "dex/Dex.h" @@ -182,6 +183,28 @@ return std::move(Syms).build(); } +llvm::Optional symbolsFromFile(llvm::StringRef SymbolFilename) { + auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); + if (!Buffer) { +llvm::errs() << "Can't open " << SymbolFilename << "\n"; +return llvm::None; + } + StringRef Data = Buffer->get()->getBuffer(); + + llvm::Optional Slab; + if (Data.startswith("RIFF")) { // Magic for binary index file. +trace::Span Tracer("ParseRIFF"); +if (auto RIFF = readIndexFile(Data)) + Slab = std::move(RIFF->Symbols); +else + llvm::errs() << "Bad RIFF: " << llvm::toString(RIFF.takeError()) << "\n"; + } else { +trace::Span Tracer("ParseYAML"); +Slab = symbolsFromYAML(Data); + } + return Slab; +} + Symbol SymbolFromYAML(llvm::yaml::Input ) { Symbol S; Input >> S; @@ -206,30 +229,16 @@ llvm::ArrayRef URISchemes, bool UseDex) { trace::Span OverallTracer("LoadIndex"); - auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); - if (!Buffer) { -llvm::errs() << "Can't open " << SymbolFilename << "\n"; -return nullptr; - } - StringRef Data = Buffer->get()->getBuffer(); - - llvm::Optional Slab; - if (Data.startswith("RIFF")) { // Magic for binary index file. -trace::Span Tracer("ParseRIFF"); -if (auto RIFF = readIndexFile(Data)) - Slab = std::move(RIFF->Symbols); -else - llvm::errs() << "Bad RIFF: " << llvm::toString(RIFF.takeError()) << "\n"; - } else { -trace::Span Tracer("ParseYAML"); -Slab = symbolsFromYAML(Data); - } - + auto Slab = symbolsFromFile(SymbolFilename); if (!Slab) return nullptr; trace::Span Tracer("BuildIndex"); - return UseDex ? dex::Dex::build(std::move(*Slab), URISchemes) -: MemIndex::build(std::move(*Slab), RefSlab()); + auto Index = UseDex ? dex::Dex::build(std::move(*Slab), URISchemes) + : MemIndex::build(std::move(*Slab), RefSlab()); + vlog("Loaded {0} from {1} with estimated memory usage {2}", + UseDex ? "Dex" : "MemIndex", SymbolFilename, + Index->estimateMemoryUsage()); + return Index; } } // namespace clangd Index: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp === --- clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp +++ clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp @@ -33,6 +33,16 @@ return loadIndex(IndexFilename, {}, true); } +SymbolSlab loadSlab() { + auto Slab = symbolsFromFile(IndexFilename); + if (!Slab) { +llvm::errs() << "Error when loading SymbolSlab from file: " << IndexFilename + << '\n'; +exit(1); + } + return
[PATCH] D52047: [clangd] Add building benchmark and memory consumption tracking
kbobyrev updated this revision to Diff 166266. kbobyrev retitled this revision from "[clangd] Add a "benchmark" for tracking memory" to "[clangd] Add building benchmark and memory consumption tracking". kbobyrev edited the summary of this revision. kbobyrev added a comment. Add symbol index building benchmarks, split `loadIndex()` into `symbolsFromFile()` and actual index build. https://reviews.llvm.org/D52047 Files: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp clang-tools-extra/clangd/index/SymbolYAML.cpp clang-tools-extra/clangd/index/SymbolYAML.h clang-tools-extra/clangd/index/dex/Dex.cpp Index: clang-tools-extra/clangd/index/dex/Dex.cpp === --- clang-tools-extra/clangd/index/dex/Dex.cpp +++ clang-tools-extra/clangd/index/dex/Dex.cpp @@ -124,9 +124,6 @@ for (const auto : TempInvertedIndex) InvertedIndex.insert({TokenToPostingList.first, PostingList(move(TokenToPostingList.second))}); - - vlog("Built Dex with estimated memory usage {0} bytes.", - estimateMemoryUsage()); } /// Constructs iterators over tokens extracted from the query and exhausts it @@ -239,7 +236,7 @@ Bytes += LookupTable.getMemorySize(); Bytes += InvertedIndex.getMemorySize(); for (const auto : InvertedIndex) -Bytes += P.second.bytes(); +Bytes += P.first.Data.size() + P.second.bytes(); return Bytes + BackingDataSize; } Index: clang-tools-extra/clangd/index/SymbolYAML.h === --- clang-tools-extra/clangd/index/SymbolYAML.h +++ clang-tools-extra/clangd/index/SymbolYAML.h @@ -29,6 +29,9 @@ // Read symbols from a YAML-format string. SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent); +// Read symbols from YAML or RIFF file. +llvm::Optional symbolsFromFile(llvm::StringRef SymbolFilename); + // Read one symbol from a YAML-stream. // The returned symbol is backed by Input. Symbol SymbolFromYAML(llvm::yaml::Input ); Index: clang-tools-extra/clangd/index/SymbolYAML.cpp === --- clang-tools-extra/clangd/index/SymbolYAML.cpp +++ clang-tools-extra/clangd/index/SymbolYAML.cpp @@ -9,6 +9,7 @@ #include "SymbolYAML.h" #include "Index.h" +#include "Logger.h" #include "Serialization.h" #include "Trace.h" #include "dex/Dex.h" @@ -182,6 +183,28 @@ return std::move(Syms).build(); } +llvm::Optional symbolsFromFile(llvm::StringRef SymbolFilename) { + auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); + if (!Buffer) { +llvm::errs() << "Can't open " << SymbolFilename << "\n"; +return llvm::None; + } + StringRef Data = Buffer->get()->getBuffer(); + + llvm::Optional Slab; + if (Data.startswith("RIFF")) { // Magic for binary index file. +trace::Span Tracer("ParseRIFF"); +if (auto RIFF = readIndexFile(Data)) + Slab = std::move(RIFF->Symbols); +else + llvm::errs() << "Bad RIFF: " << llvm::toString(RIFF.takeError()) << "\n"; + } else { +trace::Span Tracer("ParseYAML"); +Slab = symbolsFromYAML(Data); + } + return Slab; +} + Symbol SymbolFromYAML(llvm::yaml::Input ) { Symbol S; Input >> S; @@ -206,30 +229,16 @@ llvm::ArrayRef URISchemes, bool UseDex) { trace::Span OverallTracer("LoadIndex"); - auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); - if (!Buffer) { -llvm::errs() << "Can't open " << SymbolFilename << "\n"; -return nullptr; - } - StringRef Data = Buffer->get()->getBuffer(); - - llvm::Optional Slab; - if (Data.startswith("RIFF")) { // Magic for binary index file. -trace::Span Tracer("ParseRIFF"); -if (auto RIFF = readIndexFile(Data)) - Slab = std::move(RIFF->Symbols); -else - llvm::errs() << "Bad RIFF: " << llvm::toString(RIFF.takeError()) << "\n"; - } else { -trace::Span Tracer("ParseYAML"); -Slab = symbolsFromYAML(Data); - } - + auto Slab = symbolsFromFile(SymbolFilename); if (!Slab) return nullptr; trace::Span Tracer("BuildIndex"); - return UseDex ? dex::Dex::build(std::move(*Slab), URISchemes) -: MemIndex::build(std::move(*Slab), RefSlab()); + auto Index = UseDex ? dex::Dex::build(std::move(*Slab), URISchemes) + : MemIndex::build(std::move(*Slab), RefSlab()); + vlog("Loaded {0} from {1} with estimated memory usage {2}", + UseDex ? "Dex" : "MemIndex", SymbolFilename, + Index->estimateMemoryUsage()); + return Index; } } // namespace clangd Index: clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp === --- clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp +++ clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp @@ -33,6 +33,16 @@ return loadIndex(IndexFilename, {}, true); } +SymbolSlab loadSlab() { + auto Slab =