Serhiy Storchaka added the comment:
> The patch should be completed to optimize also other Unicode kinds.
I'm working on it.
Here are benchmark scripts which I use. First tests regular strings (replace
every n-th char), second tests random strings (replace 1/n of total randomly
distributed chars).
----------
Added file: http://bugs.python.org/file27544/replacebench.py
Added file: http://bugs.python.org/file27545/replacebench2.py
_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue16061>
_______________________________________
import timeit, gc, sys
size = 100000
repeats = 10
numbers = 100
gc.disable()
def bench(a, b, c):
for i in list(range(1, 11)) + [20, 50, 100, 1000, 10000]:
string = (c * (i - 1) + a) * (size // i)
string += c * (size - len(string))
gc.collect()
best = min(timeit.Timer("text.replace(a, b)",
"a=%r; b=%r; text=%r" % (a, b, string)
).repeat(repeats, numbers))
print('%.0f\t%d %a %a %a' % (best *1e6 / numbers, i, a, b, c))
sys.stdout.flush()
bench('a', 'b', 'c')
bench('\u010a', '\u010b', '\u010c')
bench('\U0001000a', '\U0001000b', '\U0001000c')
import timeit, gc, random, sys
size = 100000
repeats = 5
numbers = 100
gc.disable()
def bench(a, b, c):
for i in list(range(1, 11)) + [20, 50, 100, 1000, 10000]:
data = list(a * (size // i) + c * (size - size // i))
random.shuffle(data)
string = ''.join(data)
gc.collect()
best = min(timeit.Timer("text.replace(a, b)",
"a=%r; b=%r; text=%r" % (a, b, string)
).repeat(repeats, numbers))
print('%.0f\t%d %a %a %a' % (best *1e6 / numbers, i, a, b, c))
sys.stdout.flush()
bench('a', 'b', 'c')
bench('\u010a', '\u010b', '\u010c')
bench('\U0001000a', '\U0001000b', '\U0001000c')
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com