regex

Christos Zoulas Sun, 06 Nov 2011 08:02:16 -0800

Module Name:    src
Committed By:   christos
Date:           Sun Nov  6 16:02:08 UTC 2011


Modified Files:
        src/tests/lib/libc/regex: att.c

Log Message:
Add the nullsubexpression tests. We mostly fail or don't support them.
Two of the tests actually cause the regex library to enter infinite
recursion.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/tests/lib/libc/regex/att.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/tests/lib/libc/regex/att.c
diff -u src/tests/lib/libc/regex/att.c:1.2 src/tests/lib/libc/regex/att.c:1.3
--- src/tests/lib/libc/regex/att.c:1.2	Sun Nov  6 10:19:31 2011
+++ src/tests/lib/libc/regex/att.c	Sun Nov  6 11:02:08 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: att.c,v 1.2 2011/11/06 15:19:31 christos Exp $	*/
+/*	$NetBSD: att.c,v 1.3 2011/11/06 16:02:08 christos Exp $	*/
 
 /*-
  * Copyright (c) 2011 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: att.c,v 1.2 2011/11/06 15:19:31 christos Exp $");
+__RCSID("$NetBSD: att.c,v 1.3 2011/11/06 16:02:08 christos Exp $");
 
 #include <stdio.h>
 #include <regex.h>
@@ -60,34 +60,6 @@ ATF_TC_HEAD(regex_att, tc)
 static const char sep[] = "\r\n\t";
 static const char delim[3] = "\\\\\0";
 
-static const struct {
-	const char *n;
-	int v;
-	int ce;
-#define COMP 1
-#define EXEC 2
-} nv[] = {
-	{ "OK", 0, COMP|EXEC },
-#define _DO(a, b)	{ # a, REG_ ## a, b },
-	_DO(NOMATCH, EXEC)
-	_DO(BADPAT, COMP)
-	_DO(ECOLLATE, COMP)
-	_DO(ECTYPE, COMP)
-	_DO(EESCAPE, COMP)
-	_DO(ESUBREG, COMP)
-	_DO(EBRACK, COMP)
-	_DO(EPAREN, COMP)
-	_DO(EBRACE, COMP)
-	_DO(BADBR, COMP)
-	_DO(ERANGE, COMP)
-	_DO(ESPACE, EXEC)
-	_DO(BADRPT, COMP)
-	_DO(EMPTY, COMP)
-	_DO(ASSERT, COMP)
-	_DO(INVARG, COMP)
-	_DO(ENOSYS, COMP)
-#undef _DO
-};
 
 static void
 fail(const char *pattern, const char *input, size_t lineno) {
@@ -106,10 +78,43 @@ bug(const char *pattern, const char *inp
 		/*
 		 * The default libc implementation by Henry Spencer
 		 */
-		{ "a[-]?c", "ac" },		// basic.dat
-		{ "(a*)*", "a" },		// categorization.dat
-		{ "(aba|a*b)*", "ababa" },	// categorization.dat
-		{ "\\(a\\(b\\)*\\)*\\2", "abab" },// categorization.dat
+		{ "a[-]?c", "ac" },			// basic.dat
+		{ "(a*)*", "a" },			// categorization.dat
+		{ "(aba|a*b)*", "ababa" },		// categorization.dat
+		{ "\\(a\\(b\\)*\\)*\\2", "abab" },	// categorization.dat
+		{ "(a*)*", "aaaaaa" },			// nullsubexpression.dat
+		{ "(a*)*", "aaaaaax" },			// nullsubexpression.dat
+		{ "(a*)+", "a" },			// nullsubexpression.dat
+		{ "(a*)+", "aaaaaa" },			// nullsubexpression.dat
+		{ "(a*)+", "aaaaaax" },			// nullsubexpression.dat
+		{ "([a]*)*", "a" },			// nullsubexpression.dat
+		{ "([a]*)*", "aaaaaa" },		// nullsubexpression.dat
+		{ "([a]*)*", "aaaaaax" },		// nullsubexpression.dat
+		{ "([a]*)+", "a" },			// nullsubexpression.dat
+		{ "([a]*)+", "aaaaaa" },		// nullsubexpression.dat
+		{ "([a]*)+", "aaaaaax" },		// nullsubexpression.dat
+		{ "([^b]*)*", "a" },			// nullsubexpression.dat
+		{ "([^b]*)*", "aaaaaa" },		// nullsubexpression.dat
+		{ "([^b]*)*", "aaaaaab" },		// nullsubexpression.dat
+		{ "([ab]*)*", "a" },			// nullsubexpression.dat
+		{ "([ab]*)*", "aaaaaa" },		// nullsubexpression.dat
+		{ "([ab]*)*", "ababab" },		// nullsubexpression.dat
+		{ "([ab]*)*", "bababa" },		// nullsubexpression.dat
+		{ "([ab]*)*", "b" },			// nullsubexpression.dat
+		{ "([ab]*)*", "bbbbbb" },		// nullsubexpression.dat
+		{ "([ab]*)*", "aaaabcde" },		// nullsubexpression.dat
+		{ "([^a]*)*", "b" },			// nullsubexpression.dat
+		{ "([^a]*)*", "bbbbbb" },		// nullsubexpression.dat
+		{ "([^ab]*)*", "ccccxx" },		// nullsubexpression.dat
+		{ "\\(a*\\)*\\(x\\)", "ax" },		// nullsubexpression.dat
+		{ "\\(a*\\)*\\(x\\)", "axa" },		// nullsubexpression.dat
+		{ "\\(a*\\)*\\(x\\)\\(\\1\\)", "x" },	// nullsubexpression.dat
+/* crash! */	{ "\\(a*\\)*\\(x\\)\\(\\1\\)", "ax" },	// nullsubexpression.dat
+/* crash! */	{ "\\(a*\\)*\\(x\\)\\(\\1\\)\\(x\\)", "axxa" },	// ""
+		{ "(a*)*(x)",  "ax" },			// nullsubexpression.dat
+		{ "(a*)*(x)",  "axa" },			// nullsubexpression.dat
+		{ "(a*)+(x)",  "ax" },			// nullsubexpression.dat
+		{ "(a*)+(x)",  "axa" },			// nullsubexpression.dat
 #endif
 	};
 
@@ -123,6 +128,40 @@ bug(const char *pattern, const char *inp
 	return 0;
 }
 
+#ifdef REGEX_SPENCER
+#define HAVE_BRACES	1
+#define HAVE_MINIMAL	0
+#endif
+#ifndef HAVE_BRACES
+#define HAVE_BRACES	1
+#endif
+#ifndef HAVE_MINIMAL
+#define HAVE_MINIMAL	1
+#endif
+
+static int
+optional(const char *s)
+{
+	static const struct{
+		const char *n;
+		int v;
+	} nv[]= {
+		{ "[[<element>]] not supported", HAVE_BRACES },
+		{ "no *? +? mimimal match ops", HAVE_MINIMAL },
+	};
+
+	for (size_t i = 0; i < __arraycount(nv); i++)
+		if (strcmp(nv[i].n, s) == 0) {
+			if (nv[i].v)
+				return 0;
+			fprintf(stderr, "skipping unsupported [%s] tests\n", s);
+			return 1;
+		}
+
+	ATF_REQUIRE_MSG(0, "Unknown feature: %s", s);
+	return 0;
+}
+
 static int
 unsupported(const char *s)
 {
@@ -156,6 +195,34 @@ unsupported(const char *s)
 static void
 geterror(const char *s, int *comp, int *exec)
 {
+	static const struct {
+		const char *n;
+		int v;
+		int ce;
+	} nv[] = {
+#define COMP 1
+#define EXEC 2
+		{ "OK", 0, COMP|EXEC },
+#define _DO(a, b)	{ # a, REG_ ## a, b },
+		_DO(NOMATCH, EXEC)
+		_DO(BADPAT, COMP)
+		_DO(ECOLLATE, COMP)
+		_DO(ECTYPE, COMP)
+		_DO(EESCAPE, COMP)
+		_DO(ESUBREG, COMP)
+		_DO(EBRACK, COMP)
+		_DO(EPAREN, COMP)
+		_DO(EBRACE, COMP)
+		_DO(BADBR, COMP)
+		_DO(ERANGE, COMP)
+		_DO(ESPACE, EXEC)
+		_DO(BADRPT, COMP)
+		_DO(EMPTY, COMP)
+		_DO(ASSERT, COMP)
+		_DO(INVARG, COMP)
+		_DO(ENOSYS, COMP)
+#undef _DO
+	};
 	*comp = 0;
 	*exec = 0;
 	for (size_t i = 0; i < __arraycount(nv); i++)
@@ -244,8 +311,9 @@ checkmatches(const char *matches, size_t
 ATF_TC_BODY(regex_att, tc)
 {
 	regex_t re;
-	char *line;
+	char *line, *lastpattern = NULL;
 	size_t len, lineno = 0;
+	int skipping = 0;
 
 	for (; (line = fparseln(stdin, &len, &lineno, delim, 0))
 	    != NULL; free(line)) {
@@ -257,9 +325,19 @@ ATF_TC_BODY(regex_att, tc)
 		if ((name = strtok(line, sep)) == NULL)
 			continue;
 
-		if (*name == ';' || *name == '}' || strcmp(name, "NOTE") == 0)
+		/*
+		 * We check these early so that we skip the lines quickly
+		 * in order to do more strict testing on the other arguments
+		 * The same characters are also tested in the switch below
+		 */
+		if (*name == '}') {
+			skipping = 0;
+			continue;
+		}
+		if (skipping)
+			continue;
+		if (*name == ';' || strcmp(name, "NOTE") == 0)
 			continue;
-
 
 		ATF_REQUIRE_MSG((pattern = strtok(NULL, sep)) != NULL,
 			"Missing pattern at line %zu", lineno);
@@ -274,15 +352,29 @@ ATF_TC_BODY(regex_att, tc)
 
 		if (strcmp(input, "NULL") == 0)
 			*input = '\0';
+
+		if (strcmp(pattern, "SAME") == 0) {
+			ATF_REQUIRE(lastpattern != NULL);
+			pattern = lastpattern;
+		} else {
+			free(lastpattern);
+			ATF_REQUIRE((lastpattern = strdup(pattern)) != NULL);
+		}
+
 		ATF_REQUIRE_MSG((matches = strtok(NULL, sep)) != NULL,
-			"Missing matches at line %zu", lineno);
+		    "Missing matches at line %zu", lineno);
 
 		comment = strtok(NULL, sep);
 		switch (*name) {
 		case '{':	/* Begin optional implementation */
+			if (optional(comment)) {
+				skipping++;
+				continue;
+			}
 			name++;	/* We have it, so ignore */
 			break;
 		case '}':	/* End optional implementation */
+			skipping = 0;
 			continue;
 		case '?':	/* Optional */
 		case '|':	/* Alternative */
@@ -290,7 +382,7 @@ ATF_TC_BODY(regex_att, tc)
 				continue;
 			name++;	/* We have it, so ignore */
 			break;
-		case ';':	/* Bug */
+		case ';':	/* Skip */
 			continue;
 		default:
 			break;

CVS commit: src/tests/lib/libc/regex

Reply via email to