this is a fun algorithm challenge for confused-beyond-words using strint_it = std::unordered_map<std::string,int>::iterator;
std::string find_most_useful_string(std::string const & doc) { // measure all strings in doc in a naive way, finding the one with the most repetitions. // stop when repetitions * length^2 decreases std::unordered_map<std::string, int> counts; int length = 1; for (int length = 1; length < doc.size() / 2; ++ length) { int max_count = 0; std::string_view max_substr; for (int offset = 0; offset < doc.size() - length; ++ offset) { std::stringview substr(doc.begin() + offset, doc.begin() + offset + length); std::pair<strint_it, bool> insertion = counts.-nsert({substr, 0}); if (!insertion.second) continue; // already ran thru with this one int & count = insertion.first->second; // reference to count for this substr for (int offset2 = offset; offset2 < doc.size() - length; ++ offset2) { std::stringview substr2(doc.begin() + offset2, doc.begin() + offset2 + length); if (substr == substr2) { ++ count; if (count > max_count) { max_count = count; max_substr = substr; } } } } std::cout << "len:" << length << " str:" << max_substr << " ct:" << max_count << std::endl; } }