Author: Armin Rigo <ar...@tunes.org> Branch: py3k Changeset: r86794:adcb5fc61bbe Date: 2016-08-31 22:57 +0200 http://bitbucket.org/pypy/pypy/changeset/adcb5fc61bbe/
Log: hg merge default diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -365,7 +365,9 @@ for op1, checkerfn in unroll_char_checker: if op1 == op: return checkerfn(ctx, ptr, ppos) - raise Error("next_char_ok[%d]" % op) + # obscure case: it should be a single char pattern, but isn't + # one of the opcodes in unroll_char_checker (see test_ext_opcode) + return sre_match(ctx, ppos, ptr, self.start_marks) is not None class AbstractUntilMatchResult(MatchResult): @@ -743,7 +745,8 @@ minptr = start + ctx.pat(ppos+1) if minptr > ctx.end: return # cannot match - ptr = find_repetition_end(ctx, ppos+3, start, ctx.pat(ppos+2)) + ptr = find_repetition_end(ctx, ppos+3, start, ctx.pat(ppos+2), + marks) # when we arrive here, ptr points to the tail of the target # string. check if the rest of the pattern matches, # and backtrack if not. @@ -765,7 +768,7 @@ if minptr > ctx.end: return # cannot match # count using pattern min as the maximum - ptr = find_repetition_end(ctx, ppos+3, ptr, min) + ptr = find_repetition_end(ctx, ppos+3, ptr, min, marks) if ptr < minptr: return # did not match minimum number of times @@ -812,7 +815,7 @@ return True @specializectx -def find_repetition_end(ctx, ppos, ptr, maxcount): +def find_repetition_end(ctx, ppos, ptr, maxcount, marks): end = ctx.end ptrp1 = ptr + 1 # First get rid of the cases where we don't have room for any match. @@ -827,8 +830,11 @@ if op1 == op: if checkerfn(ctx, ptr, ppos): break + return ptr else: - return ptr + # obscure case: it should be a single char pattern, but isn't + # one of the opcodes in unroll_char_checker (see test_ext_opcode) + return general_find_repetition_end(ctx, ppos, ptr, maxcount, marks) # It matches at least once. If maxcount == 1 (relatively common), # then we are done. if maxcount == 1: @@ -846,6 +852,19 @@ raise Error("rsre.find_repetition_end[%d]" % op) @specializectx +def general_find_repetition_end(ctx, ppos, ptr, maxcount, marks): + # moved into its own JIT-opaque function + end = ctx.end + if maxcount != rsre_char.MAXREPEAT: + # adjust end + end1 = ptr + maxcount + if end1 <= end: + end = end1 + while ptr < end and sre_match(ctx, ppos, ptr, marks) is not None: + ptr += 1 + return ptr + +@specializectx def match_ANY(ctx, ptr, ppos): # dot wildcard. return not rsre_char.is_linebreak(ctx.str(ptr)) def match_ANY_ALL(ctx, ptr, ppos): diff --git a/rpython/rlib/rsre/test/test_ext_opcode.py b/rpython/rlib/rsre/test/test_ext_opcode.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rsre/test/test_ext_opcode.py @@ -0,0 +1,26 @@ +""" +Test for cases that cannot be produced using the Python 2.7 sre_compile +module, but can be produced by other means (e.g. Python 3.5) +""" + +from rpython.rlib.rsre import rsre_core +from rpython.rlib.rsre.rsre_char import MAXREPEAT + +# import OPCODE_XX as XX +for name, value in rsre_core.__dict__.items(): + if name.startswith('OPCODE_') and isinstance(value, int): + globals()[name[7:]] = value + + +def test_repeat_one_with_backref(): + # Python 3.5 compiles "(.)\1*" using REPEAT_ONE instead of REPEAT: + # it's a valid optimization because \1 is always one character long + r = [MARK, 0, ANY, MARK, 1, REPEAT_ONE, 6, 0, MAXREPEAT, + GROUPREF, 0, SUCCESS, SUCCESS] + assert rsre_core.match(r, "aaa").match_end == 3 + +def test_min_repeat_one_with_backref(): + # Python 3.5 compiles "(.)\1*?b" using MIN_REPEAT_ONE + r = [MARK, 0, ANY, MARK, 1, MIN_REPEAT_ONE, 6, 0, MAXREPEAT, + GROUPREF, 0, SUCCESS, LITERAL, 98, SUCCESS] + assert rsre_core.match(r, "aaab").match_end == 4 _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit