On Fri, Jan 09, 2009 at 09:33:45AM +0100, Paolo Bonzini wrote:
This was bug 9697 in glibc's bugzilla, and is fixed by the patch at git
diff 4c2a6f3d 37bdc055ce^ in git://sources.redhat.com/git/glibc.git
(gitweb interface at http://repo.or.cz/w/glibc.git).
Another regex bug (bug 697 in glibc's bugzilla), by the way, is fixed by
the patch at git diff 37bdc055^ 9d565658.
Attaching those two for posterity.
diff --git a/ChangeLog b/ChangeLog
index 182bd26..8829b44 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2009-01-04 Paolo Bonzini bonz...@gnu.org
+
+ [BZ 9697]
+ * posix/bug-regex17.c: Add testcases.
+ * posix/regcomp.c (re_compile_fastmap_iter): Rewrite COMPLEX_BRACKET
+ handling.
+
2009-01-05 Martin Schwidefsky schwidef...@de.ibm.com
* sysdeps/unix/sysv/linux/s390/bits/libc-vdso.h: New file.
diff --git a/posix/bug-regex17.c b/posix/bug-regex17.c
index b42f9b6..1c11a1d 100644
--- a/posix/bug-regex17.c
+++ b/posix/bug-regex17.c
@@ -1,5 +1,5 @@
-/* Turkish regular expression tests.
- Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+/* German regular expression tests.
+ Copyright (C) 2002, 2003, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek ja...@redhat.com, 2002.
@@ -33,10 +33,10 @@ struct
int flags, nmatch;
regmatch_t rm[5];
} tests[] = {
- /* \xc3\x84 LATIN CAPITAL LETTER A WITH DIAERESIS
- \xc3\x96 LATIN CAPITAL LETTER O WITH DIAERESIS
- \xc3\xa4 LATIN SMALL LETTER A WITH DIAERESIS
- \xc3\xb6 LATIN SMALL LETTER O WITH DIAERESIS */
+ /* U+00C4\xc3\x84LATIN CAPITAL LETTER A WITH DIAERESIS
+ U+00D6\xc3\x96LATIN CAPITAL LETTER O WITH DIAERESIS
+ U+00E4\xc3\xa4LATIN SMALL LETTER A WITH DIAERESIS
+ U+00F6\xc3\xb6LATIN SMALL LETTER O WITH DIAERESIS */
{ \xc3\x84\xc3\x96*\xc3\xb6$, aB\xc3\xa4\xc3\xb6\xc3\xb6\xc3\x96,
REG_ICASE, 2,
{ { 2, 10 }, { -1, -1 } } },
{ [\xc3\x84x]\xc3\x96*\xc3\xb6$, aB\xc3\x84\xc3\xb6\xc3\xb6\xc3\x96,
REG_ICASE, 2,
@@ -45,10 +45,22 @@ struct
{ { 2, 10 }, { -1, -1 } } },
{ [^x]\xc3\x96*\xc3\xb6$, aB\xc3\xa4\xc3\xb6\xc3\xb6\xc3\x96, REG_ICASE,
2,
{ { 2, 10 }, { -1, -1 } } },
+
+ /* Tests for bug 9697:
+ U+00DF\xc3\x9fLATIN SMALL LETTER SHARP S
+ U+02DA\xcb\x9aRING ABOVE
+ U+02E2\xcb\xa2MODIFIER LETTER SMALL S */
+ { [a-z]|[^a-z], \xcb\xa2, REG_EXTENDED, 2,
+{ { 0, 2 }, { -1, -1 } } },
+ { [a-z], \xc3\x9f, REG_EXTENDED, 2,
+{ { 0, 2 }, { -1, -1 } } },
+ { [^a-z], \xcb\x9a, REG_EXTENDED, 2,
+{ { 0, 2 }, { -1, -1 } } },
};
-int
-main (void)
+
+static int
+do_test (void)
{
regex_t re;
regmatch_t rm[5];
@@ -93,3 +105,6 @@ main (void)
return ret;
}
+
+#define TEST_FUNCTION do_test ()
+#include ../test-skeleton.c
diff --git a/posix/regcomp.c b/posix/regcomp.c
index d5a0535..4843cfe 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -1,5 +1,6 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002,2003,2004,2005,2006,2007 Free Software Foundation, Inc.
+ Copyright (C) 2002,2003,2004,2005,2006,2007,2009
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa is...@yamato.ibm.com.
@@ -350,47 +351,67 @@ re_compile_fastmap_iter (regex_t *bufp, const
re_dfastate_t *init_state,
#ifdef RE_ENABLE_I18N
else if (type == COMPLEX_BRACKET)
{
- int i;
re_charset_t *cset = dfa-nodes[node].opr.mbcset;
- if (cset-non_match || cset-ncoll_syms || cset-nequiv_classes
- || cset-nranges || cset-nchar_classes)
- {
+ int i;
+
# ifdef _LIBC
- if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
+ /* See if we have to try all bytes which start multiple collation
+elements.
+e.g. In da_DK, we want to catch 'a' since aa is a valid
+ collation element, and don't catch 'b' since 'b' is
+ the only collation element which starts from 'b' (and
+ it is caught by SIMPLE_BRACKET). */
+ if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
+ (cset-ncoll_syms || cset-nranges))
{
- /* In this case we want to catch the bytes which are
-the first byte of any collation elements.
-e.g. In da_DK, we want to catch 'a' since aa
- is a valid collation element, and don't catch
- 'b' since 'b' is the only collation element
- which starts from 'b'. */
const int32_t *table = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
for (i = 0; i SBC_MAX; ++i)
if (table[i] 0)