Author: Philip Jenvey <[email protected]>
Branch: py3k
Changeset: r71493:947c69e3867c
Date: 2014-05-13 11:28 -0700
http://bitbucket.org/pypy/pypy/changeset/947c69e3867c/
Log: update the sre support code to CPython 3.2.5's
diff --git a/pypy/module/cpyext/test/_sre.c b/pypy/module/cpyext/test/_sre.c
--- a/pypy/module/cpyext/test/_sre.c
+++ b/pypy/module/cpyext/test/_sre.c
@@ -453,7 +453,7 @@
}
else {
/* <CHARSET> <bitmap> (32 bits per code word) */
- if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
+ if (ch < 256 && (set[ch >> 5] & (1u << (ch & 31))))
return ok;
set += 8;
}
@@ -492,7 +492,7 @@
block = -1;
set += 64;
if (block >=0 &&
- (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
+ (set[block*8 + ((ch & 255)>>5)] & (1u << (ch & 31))))
return ok;
set += count*8;
}
@@ -518,7 +518,7 @@
Py_ssize_t i;
/* adjust end */
- if (maxcount < end - ptr && maxcount != 65535)
+ if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
end = ptr + maxcount;
switch (pattern[0]) {
@@ -1133,7 +1133,7 @@
} else {
/* general case */
LASTMARK_SAVE();
- while ((Py_ssize_t)ctx->pattern[2] == 65535
+ while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
|| ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
state->ptr = ctx->ptr;
DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
@@ -1219,7 +1219,7 @@
}
if ((ctx->count < ctx->u.rep->pattern[2] ||
- ctx->u.rep->pattern[2] == 65535) &&
+ ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
state->ptr != ctx->u.rep->last_ptr) {
/* we may have enough matches, but if we can
match another item, do so */
@@ -1296,13 +1296,18 @@
LASTMARK_RESTORE();
- if (ctx->count >= ctx->u.rep->pattern[2]
- && ctx->u.rep->pattern[2] != 65535)
+ if ((ctx->count >= ctx->u.rep->pattern[2]
+ && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
+ state->ptr == ctx->u.rep->last_ptr)
RETURN_FAILURE;
ctx->u.rep->count = ctx->count;
+ /* zero-width match protection */
+ DATA_PUSH(&ctx->u.rep->last_ptr);
+ ctx->u.rep->last_ptr = state->ptr;
DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
ctx->u.rep->pattern+3);
+ DATA_POP(&ctx->u.rep->last_ptr);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
@@ -1630,7 +1635,7 @@
static PyObject *
sre_codesize(PyObject* self, PyObject *unused)
{
- return Py_BuildValue("l", sizeof(SRE_CODE));
+ return PyLong_FromSize_t(sizeof(SRE_CODE));
}
static PyObject *
@@ -2468,7 +2473,7 @@
return NULL;
if (subn)
- return Py_BuildValue("Ni", item, n);
+ return Py_BuildValue("Nn", item, n);
return item;
@@ -2560,35 +2565,35 @@
}
PyDoc_STRVAR(pattern_match_doc,
-"match(string[, pos[, endpos]]) --> match object or None.\n\
+"match(string[, pos[, endpos]]) -> match object or None.\n\n\
Matches zero or more characters at the beginning of the string");
PyDoc_STRVAR(pattern_search_doc,
-"search(string[, pos[, endpos]]) --> match object or None.\n\
+"search(string[, pos[, endpos]]) -> match object or None.\n\n\
Scan through string looking for a match, and return a corresponding\n\
- MatchObject instance. Return None if no position in the string matches.");
+ match object instance. Return None if no position in the string matches.");
PyDoc_STRVAR(pattern_split_doc,
-"split(string[, maxsplit = 0]) --> list.\n\
+"split(string[, maxsplit = 0]) -> list.\n\n\
Split string by the occurrences of pattern.");
PyDoc_STRVAR(pattern_findall_doc,
-"findall(string[, pos[, endpos]]) --> list.\n\
+"findall(string[, pos[, endpos]]) -> list.\n\n\
Return a list of all non-overlapping matches of pattern in string.");
PyDoc_STRVAR(pattern_finditer_doc,
-"finditer(string[, pos[, endpos]]) --> iterator.\n\
+"finditer(string[, pos[, endpos]]) -> iterator.\n\n\
Return an iterator over all non-overlapping matches for the \n\
RE pattern in string. For each match, the iterator returns a\n\
match object.");
PyDoc_STRVAR(pattern_sub_doc,
-"sub(repl, string[, count = 0]) --> newstring\n\
+"sub(repl, string[, count = 0]) -> newstring.\n\n\
Return the string obtained by replacing the leftmost non-overlapping\n\
occurrences of pattern in string by the replacement repl.");
PyDoc_STRVAR(pattern_subn_doc,
-"subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
+"subn(repl, string[, count = 0]) -> (newstring, number of subs)\n\n\
Return the tuple (new_string, number_of_subs_made) found by replacing\n\
the leftmost non-overlapping occurrences of pattern with the\n\
replacement repl.");
@@ -2696,6 +2701,13 @@
for (i = 0; i < n; i++) {
PyObject *o = PyList_GET_ITEM(code, i);
unsigned long value = PyLong_AsUnsignedLong(o);
+ if (value == (unsigned long)-1 && PyErr_Occurred()) {
+ if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "regular expression code size limit exceeded");
+ }
+ break;
+ }
self->code[i] = (SRE_CODE) value;
if ((unsigned long) self->code[i] != value) {
PyErr_SetString(PyExc_OverflowError,
@@ -3066,10 +3078,8 @@
GET_ARG; max = arg;
if (min > max)
FAIL;
-#ifdef Py_UNICODE_WIDE
- if (max > 65535)
+ if (max > SRE_MAXREPEAT)
FAIL;
-#endif
if (!_validate_inner(code, code+skip-4, groups))
FAIL;
code += skip-4;
@@ -3087,10 +3097,8 @@
GET_ARG; max = arg;
if (min > max)
FAIL;
-#ifdef Py_UNICODE_WIDE
- if (max > 65535)
+ if (max > SRE_MAXREPEAT)
FAIL;
-#endif
if (!_validate_inner(code, code+skip-3, groups))
FAIL;
code += skip-3;
@@ -3421,7 +3429,7 @@
}
/* mark is -1 if group is undefined */
- return Py_BuildValue("i", self->mark[index*2]);
+ return PyLong_FromSsize_t(self->mark[index*2]);
}
static PyObject*
@@ -3444,7 +3452,7 @@
}
/* mark is -1 if group is undefined */
- return Py_BuildValue("i", self->mark[index*2+1]);
+ return PyLong_FromSsize_t(self->mark[index*2+1]);
}
LOCAL(PyObject*)
@@ -3577,14 +3585,54 @@
#endif
}
+PyDoc_STRVAR(match_doc,
+"The result of re.match() and re.search().\n\
+Match objects always have a boolean value of True.");
+
+PyDoc_STRVAR(match_group_doc,
+"group([group1, ...]) -> str or tuple.\n\n\
+ Return subgroup(s) of the match by indices or names.\n\
+ For 0 returns the entire match.");
+
+PyDoc_STRVAR(match_start_doc,
+"start([group=0]) -> int.\n\n\
+ Return index of the start of the substring matched by group.");
+
+PyDoc_STRVAR(match_end_doc,
+"end([group=0]) -> int.\n\n\
+ Return index of the end of the substring matched by group.");
+
+PyDoc_STRVAR(match_span_doc,
+"span([group]) -> tuple.\n\n\
+ For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
+
+PyDoc_STRVAR(match_groups_doc,
+"groups([default=None]) -> tuple.\n\n\
+ Return a tuple containing all the subgroups of the match, from 1.\n\
+ The default argument is used for groups\n\
+ that did not participate in the match");
+
+PyDoc_STRVAR(match_groupdict_doc,
+"groupdict([default=None]) -> dict.\n\n\
+ Return a dictionary containing all the named subgroups of the match,\n\
+ keyed by the subgroup name. The default argument is used for groups\n\
+ that did not participate in the match");
+
+PyDoc_STRVAR(match_expand_doc,
+"expand(template) -> str.\n\n\
+ Return the string obtained by doing backslash substitution\n\
+ on the string template, as done by the sub() method.");
+
static PyMethodDef match_methods[] = {
- {"group", (PyCFunction) match_group, METH_VARARGS},
- {"start", (PyCFunction) match_start, METH_VARARGS},
- {"end", (PyCFunction) match_end, METH_VARARGS},
- {"span", (PyCFunction) match_span, METH_VARARGS},
- {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
- {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
- {"expand", (PyCFunction) match_expand, METH_O},
+ {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
+ {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
+ {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
+ {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
+ {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
+ match_groups_doc},
+ {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
+ match_groupdict_doc},
+ {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
{"__copy__", (PyCFunction) match_copy, METH_NOARGS},
{"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
{NULL, NULL}
@@ -3594,7 +3642,7 @@
match_lastindex_get(MatchObject *self)
{
if (self->lastindex >= 0)
- return Py_BuildValue("i", self->lastindex);
+ return PyLong_FromSsize_t(self->lastindex);
Py_INCREF(Py_None);
return Py_None;
}
@@ -3663,7 +3711,7 @@
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
- 0, /* tp_doc */
+ match_doc, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
@@ -3937,6 +3985,12 @@
Py_DECREF(x);
}
+ x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
+ if (x) {
+ PyDict_SetItemString(d, "MAXREPEAT", x);
+ Py_DECREF(x);
+ }
+
x = PyUnicode_FromString(copyright);
if (x) {
PyDict_SetItemString(d, "copyright", x);
diff --git a/pypy/module/cpyext/test/sre.h b/pypy/module/cpyext/test/sre.h
--- a/pypy/module/cpyext/test/sre.h
+++ b/pypy/module/cpyext/test/sre.h
@@ -14,11 +14,12 @@
#include "sre_constants.h"
/* size of a code word (must be unsigned short or larger, and
- large enough to hold a Py_UNICODE character) */
-#ifdef Py_UNICODE_WIDE
+ large enough to hold a UCS4 character) */
#define SRE_CODE Py_UCS4
+#if SIZEOF_SIZE_T > 4
+# define SRE_MAXREPEAT (~(SRE_CODE)0)
#else
-#define SRE_CODE unsigned short
+# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u)
#endif
typedef struct {
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit