this is a fun algorithm challenge for confused-beyond-words

using strint_it = std::unordered_map<std::string,int>::iterator;

std::string find_most_useful_string(std::string const & doc)
{
  // measure all strings in doc in a naive way, finding the one with
the most repetitions.
  // stop when repetitions * length^2 decreases

  std::unordered_map<std::string, int> counts;

  int length = 1;

  for (int length = 1; length < doc.size() / 2; ++ length) {

    int max_count = 0;
    std::string_view max_substr;

    for (int offset = 0; offset < doc.size() - length; ++ offset) {
      std::stringview substr(doc.begin() + offset, doc.begin() + offset +
length);

      std::pair<strint_it, bool> insertion = counts.-nsert({substr, 0});

      if (!insertion.second) continue; // already ran thru with this one

      int & count = insertion.first->second; // reference to count for this
substr

      for (int offset2 = offset; offset2 < doc.size() - length; ++ offset2)
{
        std::stringview substr2(doc.begin() + offset2, doc.begin() +
offset2 + length);
        if (substr == substr2) {
          ++ count;

          if (count > max_count) {
            max_count = count;
            max_substr = substr;
          }
        }
      }
    }
    std::cout << "len:" << length << " str:" << max_substr << " ct:" <<
max_count << std::endl;
  }
}

Reply via email to