With that version in my file: [enwik8](http://mattmahoney.net/dc/enwik8.zip) drops to 53s vs the python 64s. But I can't see the results with that version. And if I save the string slice I will be back to the same problem.
The python version: from timeit import default_timer as timer WORD_SIZE = 12 K = 10000 def window(line, size): for i in range(len(line) - size + 1): yield line[i : i + size] def counter(file, size, k): lines = "" for line in open(file): lines += line counts = {} for word in window(lines, size): if word in counts: counts[word] += 1 elif len(counts) < k: counts[word] = 1 else: to_remove = [] for i in counts: if counts[i] == 1: to_remove.append(i) else: counts[i] -= 1 for r in to_remove: del counts[r] return counts def printTop(table, top): sorted_keys = sorted(table, key = table.__getitem__, reverse = True) n = 0 for key in sorted_keys: n += 1 if n > top: break escaped_key = key.replace('\n', '\\n') print("{}: '{}' -> {}".format(n, escaped_key, table[key])) t0 = timer() res = counter("enwik8", WORD_SIZE, K) print("CPU time [s] ", timer() - t0) printTop(res, 30)