Mark Summerfield <m...@qtrac.eu> added the comment: Hi,
I've noticed 3 differences between the re and regex engines. I don't know if they are intended or not, but thought it best to mention them. (I used the issue2636-20090810#3.zip version.) Python 2.6.2 (r262:71600, Apr 20 2009, 09:25:38) [GCC 4.3.2 20081105 (Red Hat 4.3.2-7)] on linux2 IDLE 2.6.2 >>> import re, regex >>> ############################################################ 1 of 3 >>> re1= re.compile(r""" (?!<\w)(?P<name>[-\w]+)= (?P<quote>(?P<single>')|(?P<double>"))? (?P<value>(?(single)[^']+?|(?(double)[^"]+?|\S+))) (?(quote)(?P=quote)) """, re.VERBOSE) >>> re2= regex.compile(r""" (?!<\w)(?P<name>[-\w]+)= (?P<quote>(?P<single>')|(?P<double>"))? (?P<value>(?(single)[^']+?|(?(double)[^"]+?|\S+))) (?(quote)(?P=quote)) """, re.VERBOSE) >>> text = "<table border='1'>" >>> re1.findall(text) [('border', "'", "'", '', '1')] >>> re2.findall(text) [] >>> text = "<table border=1>" >>> re1.findall(text) [('border', '', '', '', '1>')] >>> re2.findall(text) [] >>> ############################################################ 2 of 3 >>> re1 = re.compile(r"""^[ \t]* (?P<parenthesis>\()? [- ]? (?P<area>\d{3}) (?(parenthesis)\)) [- ]? (?P<local_a>\d{3}) [- ]? (?P<local_b>\d{4}) [ \t]*$ """, re.VERBOSE) >>> re2 = regex.compile(r"""^[ \t]* (?P<parenthesis>\()? [- ]? (?P<area>\d{3}) (?(parenthesis)\)) [- ]? (?P<local_a>\d{3}) [- ]? (?P<local_b>\d{4}) [ \t]*$ """, re.VERBOSE) >>> data = ("179-829-2116", "(187) 160 0880", "(286)-771-3878", "(291) 835-9634", "353-896-0505", "(555) 555 5555", "(555) 555-5555", "(555)-555-5555", "555 555 5555", "555 555-5555", "555-555-5555", "601 805 3142", "(675) 372 3135", "810 329 7071", "(820) 951 3885", "942 818-5280", "(983)8792282") >>> for d in data: ans1 = re1.findall(d) ans2 = re2.findall(d) print "re=%s rx=%s %d" % (ans1, ans2, ans1 == ans2) re=[('', '179', '829', '2116')] rx=[('', '179', '829', '2116')] 1 re=[('(', '187', '160', '0880')] rx=[] 0 re=[('(', '286', '771', '3878')] rx=[('(', '286', '771', '3878')] 1 re=[('(', '291', '835', '9634')] rx=[] 0 re=[('', '353', '896', '0505')] rx=[('', '353', '896', '0505')] 1 re=[('(', '555', '555', '5555')] rx=[] 0 re=[('(', '555', '555', '5555')] rx=[] 0 re=[('(', '555', '555', '5555')] rx=[('(', '555', '555', '5555')] 1 re=[('', '555', '555', '5555')] rx=[] 0 re=[('', '555', '555', '5555')] rx=[] 0 re=[('', '555', '555', '5555')] rx=[('', '555', '555', '5555')] 1 re=[('', '601', '805', '3142')] rx=[] 0 re=[('(', '675', '372', '3135')] rx=[] 0 re=[('', '810', '329', '7071')] rx=[] 0 re=[('(', '820', '951', '3885')] rx=[] 0 re=[('', '942', '818', '5280')] rx=[] 0 re=[('(', '983', '879', '2282')] rx=[('(', '983', '879', '2282')] 1 >>> ############################################################ 3 of 3 >>> re1 = re.compile(r""" <img\s+[^>]*?src=(?:(?P<quote>["'])(?P<qimage>[^\1>]+?) (?P=quote)|(?P<uimage>[^"' >]+))[^>]*?>""", re.VERBOSE) >>> re2 = regex.compile(r""" <img\s+[^>]*?src=(?:(?P<quote>["'])(?P<qimage>[^\1>]+?) (?P=quote)|(?P<uimage>[^"' >]+))[^>]*?>""", re.VERBOSE) >>> data = """<body> <img src='a.png'> <img alt='picture' src="b.png"> <img alt="picture" src="Big C.png" other="xyx"> <img src=icon.png alt=icon> <img src="I'm here!.jpg" alt="aren't I?">""" >>> data = data.split("\n") >>> data = [x.strip() for x in data] >>> for d in data: ans1 = re1.findall(d) ans2 = re2.findall(d) print "re=%s rx=%s %d" % (ans1, ans2, ans1 == ans2) re=[("'", 'a.png', '')] rx=[("'", 'a.png', '')] 1 re=[('"', 'b.png', '')] rx=[('"', 'b.png', '')] 1 re=[('"', 'Big C.png', '')] rx=[('"', 'Big C.png', '')] 1 re=[('', '', 'icon.png')] rx=[('', '', 'icon.png alt=icon')] 0 re=[('"', "I'm here!.jpg", '')] rx=[('"', "I'm here!.jpg", '')] 1 I'm sorry I haven't had the time to try to minimize the examples, but I hope that at least they will prove helpful. Number 3 looks like a problem with non-greedy matching; I don't know about the others. ---------- _______________________________________ Python tracker <rep...@bugs.python.org> <http://bugs.python.org/issue2636> _______________________________________ _______________________________________________ Python-bugs-list mailing list Unsubscribe: http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com