I've had a report from a user that Plex runs about half
as fast in 2.5 as it did in 2.4. In particular, the
NFA-to-DFA conversion phase, which does a lot of
messing about with dicts representing mappings between
sets of states.
Does anyone in the Ministry for Making Python Blazingly
fast happen to know of some change that might have
pessimised things in this area?
--
Greg
--- Begin Message ---
Hi,
I have been using Plex now for several years and really like it very much!
Recently I switched from python 2.4 to 2.5 and I noticed that the parser runs
significantly slower with 2.5. I hope you do not mind that I attach an example
script and two profiler logs which show the difference. The difference is almost
a factor of 2. Do you have an idea why that might happen and is there anything
one could do to improve the performance?
Regards, Christian
--
Christian Kristukat ::
Institut fuer Festkoerperphysik, TU Berlin ======
[EMAIL PROTECTED] ||||||
Tel. +49-30-20896371 --------
from Plex import *
from Plex.Traditional import re as regex
class ParseString:
def __init__(self, parse_str):
self.parse_str = parse_str
self.EOF = 0
def read(self, size):
if self.EOF:
return ''
else:
self.EOF = 1
return self.parse_str
def reset(self):
self.EOF = 0
class SymParser:
def __init__(self, tok):
self.pstr = ParseString(tok)
self.count = 0
self.varlist = {}
self.dummy = []
self.nvars = 0
self.varfunc = self.setvar
def setvar(self,scanner,name):
if name in ['caller','e','pi']:
return name
if name not in self.varlist:
self.varlist[name] = ['ns',self.nvars]
self.dummy.append(name)
ret = 'a[%d]'%self.nvars
self.nvars += 1
else:
ret = 'a[%d]'%(self.dummy.index(name)+self.count)
return ret
def parse(self):
letter = regex('[A-Za-z_]')
digit = Range("09")
dot = Str(".")
rnumber = (Rep1(digit)+dot+Rep1(digit))|Rep1(digit)
expnumber = Rep1(digit)+dot+Rep1(digit)+Str('e')+(Any('-+')|Empty)+Rep1(digit)
cnumber = (Rep1(digit)+dot+Rep1(digit)+Str('j'))|(Rep1(digit)+Str('j'))
number = rnumber|cnumber|expnumber
x = Str("x")
name = Rep1(letter)|(Rep1(letter)+Rep1(digit)+Rep(letter))
inst_member = (name|Str(")")|digit)+dot+name
parname = Str(r"'")+name+Str(r"'")
func = name+Str("(")
op = Any("^+-/*(),")
space = Any(" \t\n\r")
lex = Lexicon([
(number, TEXT),
(x, TEXT),
(func, TEXT),
(parname, TEXT),
(inst_member, TEXT),
(name, self.varfunc),
(op, TEXT),
(space, IGNORE),
(AnyChar, IGNORE)
])
parsed = ""
scanner = Scanner(lex, self.pstr, "pparse")
while 1:
tok = scanner.read()
if tok[0] is None:
break
parsed += tok[0]
self.count += 1
return self.varlist
def sym():
for x in range(10):
a = SymParser('amp*exp(-(x-pos)**2/fwhm)')
a.parse()
print a
def prof_sym():
import profile
import pstats
profile.run('sym()','modelprof')
p = pstats.Stats('modelprof')
p.strip_dirs()
p.sort_stats('cumulative')
p.print_stats()
if __name__ == '__main__':
prof_sym()
<__main__.SymParser instance at 0xb7c2d34c>
Sat Jun 9 21:45:53 2007 modelprof
106631 function calls (104491 primitive calls) in 1.700 CPU seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 1.700 1.700 plex_test2.py:81(sym)
1 0.000 0.000 1.700 1.700 profile:0(sym())
1 0.000 0.000 1.700 1.700 <string>:1(?)
10 0.000 0.000 1.700 0.170 plex_test2.py:42(parse)
10 0.000 0.000 1.560 0.156 Lexicons.py:113(__init__)
10 0.190 0.019 1.260 0.126 DFA.py:13(nfa_to_dfa)
1350 0.070 0.000 0.310 0.000 DFA.py:100(old_to_new)
90 0.010 0.000 0.300 0.003
Lexicons.py:158(add_token_to_machine)
530/90 0.030 0.000 0.270 0.003 Regexps.py:362(build_machine)
590/100 0.020 0.000 0.240 0.002 Regexps.py:315(build_machine)
2600 0.090 0.000 0.220 0.000 DFA.py:50(set_epsilon_closure)
2800 0.170 0.000 0.220 0.000 Transitions.py:91(items)
1350 0.050 0.000 0.190 0.000 DFA.py:140(make_key)
290 0.020 0.000 0.180 0.001 Regexps.py:384(build_machine)
1340 0.100 0.000 0.150 0.000 Machines.py:180(add_transitions)
2600 0.070 0.000 0.150 0.000 Transitions.py:69(add_set)
2000 0.020 0.000 0.140 0.000 Machines.py:83(add_transition)
1390 0.140 0.000 0.140 0.000 :0(sort)
2000 0.080 0.000 0.120 0.000 Transitions.py:53(add)
2690 0.040 0.000 0.110 0.000 DFA.py:61(epsilon_closure)
830 0.000 0.000 0.090 0.000 Regexps.py:241(build_machine)
6800 0.060 0.000 0.080 0.000 Transitions.py:117(split)
6600 0.080 0.000 0.080 0.000 :0(get)
1740/560 0.030 0.000 0.070 0.000 DFA.py:74(add_to_epsilon_closure)
40 0.010 0.000 0.060 0.002 Regexps.py:456(Any)
1040 0.020 0.000 0.060 0.000 Machines.py:86(link_to)
11940 0.050 0.000 0.050 0.000 :0(len)
12050 0.050 0.000 0.050 0.000 :0(chr)
7850 0.050 0.000 0.050 0.000 :0(keys)
1050 0.020 0.000 0.050 0.000 Machines.py:33(new_state)
10 0.000 0.000 0.040 0.004 Traditional.py:54(parse_mod)
10 0.000 0.000 0.040 0.004 Traditional.py:67(parse_prim)
10 0.000 0.000 0.040 0.004 Traditional.py:15(re)
10 0.000 0.000 0.040 0.004 Traditional.py:88(parse_charset)
10 0.000 0.000 0.040 0.004 Traditional.py:47(parse_seq)
10 0.000 0.000 0.040 0.004 Traditional.py:36(parse_alt)
10 0.000 0.000 0.040 0.004 Traditional.py:30(parse_re)
110 0.000 0.000 0.030 0.000 Regexps.py:128(build_opt)
220 0.010 0.000 0.030 0.000 Regexps.py:236(__init__)
210 0.000 0.000 0.030 0.000 Regexps.py:88(CodeRange)
40 0.000 0.000 0.030 0.001 Regexps.py:78(CodeRanges)
1050 0.020 0.000 0.030 0.000 Machines.py:70(__init__)
1200 0.010 0.000 0.020 0.000 Transitions.py:148(get_special)
220 0.010 0.000 0.020 0.000 Regexps.py:52(uppercase_range)
160 0.000 0.000 0.020 0.000 Scanners.py:109(scan_a_token)
160 0.020 0.000 0.020 0.000
Scanners.py:148(run_machine_inlined)
160 0.000 0.000 0.020 0.000 Scanners.py:88(read)
2810 0.020 0.000 0.020 0.000 :0(items)
40 0.010 0.000 0.020 0.001 Regexps.py:30(chars_to_ranges)
3026 0.020 0.000 0.020 0.000 :0(update)
7590 0.020 0.000 0.020 0.000 :0(append)
90 0.000 0.000 0.020 0.000 Regexps.py:444(Str)
90 0.010 0.000 0.020 0.000 Regexps.py:436(Str1)
1980 0.010 0.000 0.010 0.000 :0(ord)
4080 0.010 0.000 0.010 0.000 :0(copy)
130 0.000 0.000 0.010 0.000 Regexps.py:378(__init__)
1740 0.010 0.000 0.010 0.000 Transitions.py:85(get_epsilon)
920 0.010 0.000 0.010 0.000 Regexps.py:150(check_re)
180 0.000 0.000 0.010 0.000 Regexps.py:138(__add__)
1360 0.010 0.000 0.010 0.000 Transitions.py:44(__init__)
280 0.010 0.000 0.010 0.000 Regexps.py:295(__init__)
150 0.010 0.000 0.010 0.000 Regexps.py:340(__init__)
80 0.000 0.000 0.010 0.000 Regexps.py:141(__or__)
150 0.010 0.000 0.010 0.000 :0(apply)
20 0.000 0.000 0.000 0.000 Regexps.py:262(build_machine)
10 0.000 0.000 0.000 0.000 Regexps.py:502(Opt)
10 0.000 0.000 0.000 0.000 Machines.py:192(get_initial_state)
10 0.000 0.000 0.000 0.000 :0(callable)
310 0.000 0.000 0.000 0.000 Machines.py:168(new_state)
30 0.000 0.000 0.000 0.000 :0(join)
310 0.000 0.000 0.000 0.000
DFA.py:119(highest_priority_action)
90 0.000 0.000 0.000 0.000 Regexps.py:213(Char)
1 0.000 0.000 0.000 0.000 :0(setprofile)
50/20 0.000 0.000 0.000 0.000 Regexps.py:144(__str__)
20 0.000 0.000 0.000 0.000 Traditional.py:130(lookahead)
10 0.000 0.000 0.000 0.000 Machines.py:47(make_initial_state)
440 0.000 0.000 0.000 0.000 :0(min)
10 0.000 0.000 0.000 0.000 Traditional.py:24(__init__)
10 0.000 0.000 0.000 0.000 Machines.py:24(__init__)
10 0.000 0.000 0.000 0.000 Machines.py:164(__del__)
10 0.000 0.000 0.000 0.000 Regexps.py:392(calc_str)
120 0.000 0.000 0.000 0.000 Actions.py:100(perform)
1050 0.000 0.000 0.000 0.000 Machines.py:77(destroy)
10 0.000 0.000 0.000 0.000 Machines.py:145(__init__)
30 0.000 0.000 0.000 0.000 Actions.py:46(perform)
10 0.000 0.000 0.000 0.000 plex_test2.py:20(__init__)
60 0.000 0.000 0.000 0.000 Traditional.py:123(get)
20 0.000 0.000 0.000 0.000 plex_test2.py:9(read)
10 0.000 0.000 0.000 0.000 Scanners.py:63(__init__)
10 0.000 0.000 0.000 0.000
Machines.py:177(make_initial_state)
100 0.000 0.000 0.000 0.000 Traditional.py:113(next)
10 0.000 0.000 0.000 0.000 Scanners.py:349(begin)
10 0.000 0.000 0.000 0.000 Traditional.py:138(expect)
90 0.000 0.000 0.000 0.000
Lexicons.py:178(parse_token_definition)
20 0.000 0.000 0.000 0.000 Regexps.py:332(calc_str)
11 0.000 0.000 0.000 0.000 :0(range)
230 0.000 0.000 0.000 0.000 :0(repr)
310 0.000 0.000 0.000 0.000 :0(clear)
110 0.000 0.000 0.000 0.000 :0(map)
10 0.000 0.000 0.000 0.000 DFA.py:95(__init__)
160 0.000 0.000 0.000 0.000 Scanners.py:355(produce)
10 0.000 0.000 0.000 0.000 Regexps.py:510(Rep)
1190 0.000 0.000 0.000 0.000 :0(isinstance)
90 0.000 0.000 0.000 0.000 Machines.py:90(set_action)
30 0.000 0.000 0.000 0.000 string.py:308(join)
220 0.000 0.000 0.000 0.000 Regexps.py:65(lowercase_range)
10 0.000 0.000 0.000 0.000 Machines.py:42(new_initial_state)
10 0.000 0.000 0.000 0.000 Actions.py:43(__init__)
310 0.000 0.000 0.000 0.000 DFA.py:136(new_to_old)
10 0.000 0.000 0.000 0.000 Lexicons.py:188(get_initial_state)
440 0.000 0.000 0.000 0.000 :0(max)
620 0.000 0.000 0.000 0.000 :0(id)
0 0.000 0.000 profile:0(profiler)
10 0.000 0.000 0.000 0.000 plex_test2.py:5(__init__)
10 0.000 0.000 0.000 0.000 Regexps.py:484(Range)
30 0.000 0.000 0.000 0.000 plex_test2.py:30(setvar)
10 0.000 0.000 0.000 0.000 Machines.py:28(__del__)
10 0.000 0.000 0.000 0.000 Scanners.py:370(eof)
<__main__.SymParser instance at 0xb7c29a8c>
Sat Jun 9 21:45:45 2007 modelprof
106602 function calls (104462 primitive calls) in 2.648 CPU seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 2.648 2.648 plex_test2.py:81(sym)
1 0.000 0.000 2.648 2.648 <string>:1(<module>)
1 0.000 0.000 2.648 2.648 profile:0(sym())
10 0.012 0.001 2.648 0.265 plex_test2.py:42(parse)
10 0.008 0.001 2.388 0.239 Lexicons.py:113(__init__)
10 0.144 0.014 1.868 0.187 DFA.py:13(nfa_to_dfa)
90 0.008 0.000 0.512 0.006
Lexicons.py:158(add_token_to_machine)
530/90 0.024 0.000 0.484 0.005 Regexps.py:362(build_machine)
1350 0.096 0.000 0.460 0.000 DFA.py:100(old_to_new)
590/100 0.076 0.000 0.436 0.004 Regexps.py:315(build_machine)
2600 0.140 0.000 0.412 0.000 Transitions.py:69(add_set)
2600 0.180 0.000 0.360 0.000 DFA.py:50(set_epsilon_closure)
6800 0.240 0.000 0.336 0.000 Transitions.py:117(split)
290 0.020 0.000 0.324 0.001 Regexps.py:384(build_machine)
2800 0.196 0.000 0.272 0.000 Transitions.py:91(items)
2000 0.064 0.000 0.268 0.000 Machines.py:83(add_transition)
1350 0.036 0.000 0.260 0.000 DFA.py:140(make_key)
1390 0.212 0.000 0.212 0.000 :0(sort)
830 0.036 0.000 0.208 0.000 Regexps.py:241(build_machine)
2000 0.072 0.000 0.204 0.000 Transitions.py:53(add)
1340 0.116 0.000 0.176 0.000 Machines.py:180(add_transitions)
2690 0.016 0.000 0.156 0.000 DFA.py:61(epsilon_closure)
1740/560 0.092 0.000 0.140 0.000 DFA.py:74(add_to_epsilon_closure)
6600 0.108 0.000 0.108 0.000 :0(get)
1040 0.016 0.000 0.092 0.000 Machines.py:86(link_to)
7850 0.076 0.000 0.076 0.000 :0(keys)
7590 0.072 0.000 0.072 0.000 :0(append)
4080 0.072 0.000 0.072 0.000 :0(copy)
12050 0.064 0.000 0.064 0.000 :0(chr)
160 0.000 0.000 0.060 0.000 Scanners.py:88(read)
11940 0.060 0.000 0.060 0.000 :0(len)
90 0.004 0.000 0.060 0.001 Regexps.py:444(Str)
40 0.004 0.000 0.052 0.001 Regexps.py:456(Any)
2997 0.052 0.000 0.052 0.000 :0(update)
90 0.004 0.000 0.052 0.001 Regexps.py:436(Str1)
160 0.004 0.000 0.044 0.000 Scanners.py:109(scan_a_token)
210 0.004 0.000 0.044 0.000 Regexps.py:88(CodeRange)
220 0.008 0.000 0.040 0.000 Regexps.py:236(__init__)
160 0.032 0.000 0.040 0.000
Scanners.py:148(run_machine_inlined)
110 0.012 0.000 0.040 0.000 :0(map)
10 0.000 0.000 0.036 0.004 Traditional.py:54(parse_mod)
10 0.004 0.000 0.036 0.004 Traditional.py:67(parse_prim)
10 0.000 0.000 0.036 0.004 Traditional.py:15(re)
10 0.000 0.000 0.036 0.004 Traditional.py:36(parse_alt)
10 0.000 0.000 0.036 0.004 Traditional.py:47(parse_seq)
10 0.000 0.000 0.036 0.004 Traditional.py:30(parse_re)
10 0.008 0.001 0.032 0.003 Traditional.py:88(parse_charset)
1050 0.012 0.000 0.032 0.000 Machines.py:33(new_state)
1740 0.024 0.000 0.028 0.000 Transitions.py:85(get_epsilon)
90 0.000 0.000 0.028 0.000 Regexps.py:213(Char)
40 0.008 0.000 0.024 0.001 Regexps.py:30(chars_to_ranges)
40 0.004 0.000 0.024 0.001 Regexps.py:78(CodeRanges)
110 0.008 0.000 0.024 0.000 Regexps.py:128(build_opt)
310 0.016 0.000 0.020 0.000
DFA.py:119(highest_priority_action)
280 0.008 0.000 0.020 0.000 Regexps.py:295(__init__)
220 0.012 0.000 0.020 0.000 Regexps.py:52(uppercase_range)
20 0.000 0.000 0.020 0.001 Regexps.py:262(build_machine)
1050 0.020 0.000 0.020 0.000 Machines.py:70(__init__)
2810 0.020 0.000 0.020 0.000 :0(items)
180 0.004 0.000 0.016 0.000 Regexps.py:138(__add__)
10 0.008 0.001 0.016 0.002 Machines.py:28(__del__)
150 0.008 0.000 0.016 0.000 Regexps.py:340(__init__)
90 0.016 0.000 0.016 0.000
Lexicons.py:178(parse_token_definition)
1200 0.012 0.000 0.016 0.000 Transitions.py:148(get_special)
1980 0.016 0.000 0.016 0.000 :0(ord)
160 0.004 0.000 0.012 0.000 Scanners.py:355(produce)
80 0.000 0.000 0.012 0.000 Regexps.py:141(__or__)
920 0.004 0.000 0.012 0.000 Regexps.py:150(check_re)
150 0.000 0.000 0.012 0.000 :0(apply)
220 0.008 0.000 0.012 0.000 Regexps.py:65(lowercase_range)
1050 0.008 0.000 0.008 0.000 Machines.py:77(destroy)
310 0.004 0.000 0.008 0.000 DFA.py:136(new_to_old)
310 0.004 0.000 0.008 0.000 Machines.py:168(new_state)
1190 0.008 0.000 0.008 0.000 :0(isinstance)
440 0.004 0.000 0.004 0.000 :0(min)
60 0.000 0.000 0.004 0.000 Traditional.py:123(get)
100 0.000 0.000 0.004 0.000 Traditional.py:113(next)
10 0.000 0.000 0.004 0.000 Scanners.py:349(begin)
620 0.004 0.000 0.004 0.000 :0(id)
10 0.000 0.000 0.004 0.000 Scanners.py:63(__init__)
10 0.004 0.000 0.004 0.000 Lexicons.py:188(get_initial_state)
10 0.000 0.000 0.004 0.000 Regexps.py:484(Range)
1360 0.004 0.000 0.004 0.000 Transitions.py:44(__init__)
30 0.000 0.000 0.004 0.000 Actions.py:46(perform)
30 0.004 0.000 0.004 0.000 plex_test2.py:30(setvar)
10 0.000 0.000 0.000 0.000 Regexps.py:502(Opt)
10 0.000 0.000 0.000 0.000 Machines.py:192(get_initial_state)
10 0.000 0.000 0.000 0.000 :0(callable)
30 0.000 0.000 0.000 0.000 :0(join)
1 0.000 0.000 0.000 0.000 :0(setprofile)
50/20 0.000 0.000 0.000 0.000 Regexps.py:144(__str__)
20 0.000 0.000 0.000 0.000 Traditional.py:130(lookahead)
10 0.000 0.000 0.000 0.000 Traditional.py:24(__init__)
10 0.000 0.000 0.000 0.000 Machines.py:24(__init__)
20 0.000 0.000 0.000 0.000 Regexps.py:332(calc_str)
10 0.000 0.000 0.000 0.000 Machines.py:164(__del__)
10 0.000 0.000 0.000 0.000 Regexps.py:392(calc_str)
120 0.000 0.000 0.000 0.000 Actions.py:100(perform)
10 0.000 0.000 0.000 0.000 Machines.py:145(__init__)
30 0.000 0.000 0.000 0.000 string.py:306(join)
10 0.000 0.000 0.000 0.000 Machines.py:47(make_initial_state)
10 0.000 0.000 0.000 0.000 plex_test2.py:20(__init__)
130 0.000 0.000 0.000 0.000 Regexps.py:378(__init__)
20 0.000 0.000 0.000 0.000 plex_test2.py:9(read)
10 0.000 0.000 0.000 0.000
Machines.py:177(make_initial_state)
0 0.000 0.000 profile:0(profiler)
10 0.000 0.000 0.000 0.000 Traditional.py:138(expect)
11 0.000 0.000 0.000 0.000 :0(range)
230 0.000 0.000 0.000 0.000 :0(repr)
10 0.000 0.000 0.000 0.000 DFA.py:95(__init__)
10 0.000 0.000 0.000 0.000 Regexps.py:510(Rep)
90 0.000 0.000 0.000 0.000 Machines.py:90(set_action)
10 0.000 0.000 0.000 0.000 Actions.py:43(__init__)
10 0.000 0.000 0.000 0.000 Machines.py:42(new_initial_state)
440 0.000 0.000 0.000 0.000 :0(max)
310 0.000 0.000 0.000 0.000 :0(clear)
10 0.000 0.000 0.000 0.000 plex_test2.py:5(__init__)
10 0.000 0.000 0.000 0.000 Scanners.py:370(eof)
--- End Message ---
_______________________________________________
Python-Dev mailing list
Python-Dev@python.org
http://mail.python.org/mailman/listinfo/python-dev
Unsubscribe:
http://mail.python.org/mailman/options/python-dev/archive%40mail-archive.com