Module Name:    src
Committed By:   tnozaki
Date:           Sat Nov 19 17:45:11 UTC 2011

Modified Files:
        src/dist/nvi/regex: engine.c regcomp.c regex2.h

Log Message:
reliability fix, merge libc/regex fix to avoid memory exhaust problem.


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/dist/nvi/regex/engine.c \
    src/dist/nvi/regex/regcomp.c
cvs rdiff -u -r1.3 -r1.4 src/dist/nvi/regex/regex2.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/dist/nvi/regex/engine.c
diff -u src/dist/nvi/regex/engine.c:1.6 src/dist/nvi/regex/engine.c:1.7
--- src/dist/nvi/regex/engine.c:1.6	Sun Apr 12 14:47:51 2009
+++ src/dist/nvi/regex/engine.c	Sat Nov 19 17:45:11 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: engine.c,v 1.6 2009/04/12 14:47:51 tnozaki Exp $ */
+/*	$NetBSD: engine.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */
 
 /*-
  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
@@ -168,8 +168,8 @@ int eflags;
 	/* prescreening; this does wonders for this rather slow code */
 	if (g->must != NULL) {
 		for (dp = start; dp < stop; dp++)
-			if (*dp == g->must[0] && stop - dp >= g->mlen &&
-				MEMCMP(dp, g->must, (size_t)g->mlen) == 0)
+			if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen &&
+				MEMCMP(dp, g->must, g->mlen) == 0)
 				break;
 		if (dp == stop)		/* we didn't find g->must */
 			return(REG_NOMATCH);
Index: src/dist/nvi/regex/regcomp.c
diff -u src/dist/nvi/regex/regcomp.c:1.6 src/dist/nvi/regex/regcomp.c:1.7
--- src/dist/nvi/regex/regcomp.c:1.6	Mon Mar 21 14:53:03 2011
+++ src/dist/nvi/regex/regcomp.c	Sat Nov 19 17:45:11 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: regcomp.c,v 1.6 2011/03/21 14:53:03 tnozaki Exp $ */
+/*	$NetBSD: regcomp.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */
 
 /*-
  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
@@ -82,11 +82,11 @@ extern "C" {
 #endif
 
 /* === regcomp.c === */
-static void p_ere __P((struct parse *p, int stop));
-static void p_ere_exp __P((struct parse *p));
+static void p_ere __P((struct parse *p, int stop, size_t reclimit));
+static void p_ere_exp __P((struct parse *p, size_t reclimit));
 static void p_str __P((struct parse *p));
-static void p_bre __P((struct parse *p, int end1, int end2));
-static int p_simp_re __P((struct parse *p, int starordinary));
+static void p_bre __P((struct parse *p, int end1, int end2, size_t reclimit));
+static int p_simp_re __P((struct parse *p, int starordinary, size_t reclimit));
 static int p_count __P((struct parse *p));
 static void p_bracket __P((struct parse *p));
 static void p_b_term __P((struct parse *p, cset *cs));
@@ -98,7 +98,7 @@ static char othercase __P((int ch));
 static void bothcases __P((struct parse *p, int ch));
 static void ordinary __P((struct parse *p, int ch));
 static void nonnewline __P((struct parse *p));
-static void repeat __P((struct parse *p, sopno start, int from, int to));
+static void repeat __P((struct parse *p, sopno start, int from, int to, size_t reclimit));
 static int seterr __P((struct parse *p, int e));
 static cset *allocset __P((struct parse *p));
 static void freeset __P((struct parse *p, cset *cs));
@@ -122,7 +122,7 @@ static sopno dupl __P((struct parse *p, 
 static void doemit __P((struct parse *p, sop op, size_t opnd));
 static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
 static void dofwd __P((struct parse *p, sopno pos, sop value));
-static void enlarge __P((struct parse *p, sopno size));
+static int enlarge __P((struct parse *p, sopno size));
 static void stripsnug __P((struct parse *p, struct re_guts *g));
 static void findmust __P((struct parse *p, struct re_guts *g));
 static sopno pluscount __P((struct parse *p, struct re_guts *g));
@@ -170,6 +170,13 @@ static int never = 0;		/* for use in ass
 #define	never	0		/* some <assert.h>s have bugs too */
 #endif
 
+#define	MEMLIMIT	0x8000000
+#define MEMSIZE(p) \
+	((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
+	(p)->ncsalloc * sizeof(cset) + \
+	(p)->ssize * sizeof(sop))
+#define	RECLIMIT	256
+
 /*
  - regcomp - interface for parser and compilation
  = extern int regcomp(regex_t *, const RCHAR_T *, int);
@@ -258,11 +265,11 @@ regcomp(regex_t *preg, const RCHAR_T *pa
 	EMIT(OEND, 0);
 	g->firststate = THERE();
 	if (cflags&REG_EXTENDED)
-		p_ere(p, OUT);
+		p_ere(p, OUT, 0);
 	else if (cflags&REG_NOSPEC)
 		p_str(p);
 	else
-		p_bre(p, OUT, OUT);
+		p_bre(p, OUT, OUT, 0);
 	EMIT(OEND, 0);
 	g->laststate = THERE();
 
@@ -289,10 +296,10 @@ regcomp(regex_t *preg, const RCHAR_T *pa
 
 /*
  - p_ere - ERE parser top level, concatenation and alternation
- == static void p_ere(register struct parse *p, int stop);
+ == static void p_ere(register struct parse *p, int stop, size_t reclimit);
  */
 static void
-p_ere(register struct parse *p, int stop)
+p_ere(register struct parse *p, int stop, size_t reclimit)
                          
          			/* character this ERE should end at */
 {
@@ -302,11 +309,16 @@ p_ere(register struct parse *p, int stop
 	register sopno conc;
 	register int first = 1;		/* is this the first alternative? */
 
+	if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
+		p->error = REG_ESPACE;
+		return;
+	}
+
 	for (;;) {
 		/* do a bunch of concatenated expressions */
 		conc = HERE();
 		while (MORE() && (c = PEEK()) != '|' && c != stop)
-			p_ere_exp(p);
+			p_ere_exp(p, reclimit);
 		(void)REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
 
 		if (!EAT('|'))
@@ -338,7 +350,7 @@ p_ere(register struct parse *p, int stop
  == static void p_ere_exp(register struct parse *p);
  */
 static void
-p_ere_exp(register struct parse *p)
+p_ere_exp(register struct parse *p, size_t reclimit)
 {
 	register char c;
 	register sopno pos;
@@ -360,7 +372,7 @@ p_ere_exp(register struct parse *p)
 			p->pbegin[subno] = HERE();
 		EMIT(OLPAREN, subno);
 		if (!SEE(')'))
-			p_ere(p, ')');
+			p_ere(p, ')', reclimit);
 		if (subno < NPAREN) {
 			p->pend[subno] = HERE();
 			assert(p->pend[subno] != 0);
@@ -462,7 +474,7 @@ p_ere_exp(register struct parse *p)
 				count2 = INFINITY;
 		} else		/* just a single number */
 			count2 = count;
-		repeat(p, pos, count, count2);
+		repeat(p, pos, count, count2, 0);
 		if (!EAT('}')) {	/* error heuristics */
 			while (MORE() && PEEK() != '}')
 				NEXT();
@@ -496,7 +508,7 @@ p_str(register struct parse *p)
 /*
  - p_bre - BRE parser top level, anchoring and concatenation
  == static void p_bre(register struct parse *p, register int end1, \
- ==	register int end2);
+ ==	register int end2, size_t reclimit);
  * Giving end1 as OUT essentially eliminates the end1/end2 check.
  *
  * This implementation is a bit of a kludge, in that a trailing $ is first
@@ -506,22 +518,29 @@ p_str(register struct parse *p)
  * The amount of lookahead needed to avoid this kludge is excessive.
  */
 static void
-p_bre(register struct parse *p, register int end1, register int end2)
+p_bre(register struct parse *p, register int end1, register int end2, size_t reclimit)
                          
                   		/* first terminating character */
                   		/* second terminating character */
 {
-	register sopno start = HERE();
+	register sopno start;
 	register int first = 1;			/* first subexpression? */
 	register int wasdollar = 0;
 
+	if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
+		p->error = REG_ESPACE;
+		return;
+	}
+
+	start = HERE();
+
 	if (EAT('^')) {
 		EMIT(OBOL, 0);
 		p->g->iflags |= USEBOL;
 		p->g->nbol++;
 	}
 	while (MORE() && !SEETWO(end1, end2)) {
-		wasdollar = p_simp_re(p, first);
+		wasdollar = p_simp_re(p, first, reclimit);
 		first = 0;
 	}
 	if (wasdollar) {	/* oops, that was a trailing anchor */
@@ -536,10 +555,10 @@ p_bre(register struct parse *p, register
 
 /*
  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
- == static int p_simp_re(register struct parse *p, int starordinary);
+ == static int p_simp_re(register struct parse *p, int starordinary, size_t reclimit);
  */
 static int			/* was the simple RE an unbackslashed $? */
-p_simp_re(register struct parse *p, int starordinary)
+p_simp_re(register struct parse *p, int starordinary, size_t reclimit)
                          
                  		/* is a leading * an ordinary character? */
 {
@@ -571,7 +590,7 @@ p_simp_re(register struct parse *p, int 
 			EMIT(OLPAREN, subno);
 			/* the MORE here is an error heuristic */
 			if (MORE() && !SEETWO('\\', ')'))
-				p_bre(p, '\\', ')');
+				p_bre(p, '\\', ')', reclimit);
 			if (subno < NPAREN) {
 				p->pend[subno] = HERE();
 				assert(p->pend[subno] != 0);
@@ -646,7 +665,7 @@ p_simp_re(register struct parse *p, int 
 				count2 = INFINITY;
 		} else		/* just a single number */
 			count2 = count;
-		repeat(p, pos, count, count2);
+		repeat(p, pos, count, count2, reclimit);
 		if (!EATTWO('\\', '}')) {	/* error heuristics */
 			while (MORE() && !SEETWO('\\', '}'))
 				NEXT();
@@ -688,11 +707,15 @@ p_count(register struct parse *p)
 static void
 p_bracket(register struct parse *p)
 {
-	register cset *cs = allocset(p);
+	register cset *cs;
 	register int invert = 0;
 	static RCHAR_T bow[] = { '[', ':', '<', ':', ']', ']' };
 	static RCHAR_T eow[] = { '[', ':', '>', ':', ']', ']' };
 
+	cs = allocset(p);
+	if (cs == NULL)
+		return;
+
 	/* Dept of Truly Sickening Special-Case Kludges */
 	if (p->next + 5 < p->end && MEMCMP(p->next, bow, 6) == 0) {
 		EMIT(OBOW, 0);
@@ -1005,25 +1028,29 @@ nonnewline(register struct parse *p)
 
 /*
  - repeat - generate code for a bounded repetition, recursively if needed
- == static void repeat(register struct parse *p, sopno start, int from, int to);
+ == static void repeat(register struct parse *p, sopno start, int from, int to, size_t reclimit);
  */
 static void
-repeat(register struct parse *p, sopno start, int from, int to)
+repeat(register struct parse *p, sopno start, int from, int to, size_t reclimit)
                          
             			/* operand from here to end of strip */
          			/* repeated from this number */
        				/* to this number of times (maybe INFINITY) */
 {
-	register sopno finish = HERE();
+	register sopno finish;
 #	define	N	2
 #	define	INF	3
 #	define	REP(f, t)	((f)*8 + (t))
 #	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
 	register sopno copy;
 
-	if (p->error != 0)	/* head off possible runaway recursion */
+	if (reclimit++ > RECLIMIT) 
+		p->error = REG_ESPACE;
+	if (p->error)
 		return;
 
+	finish = HERE();
+
 	assert(from <= to);
 
 	switch (REP(MAP(from), MAP(to))) {
@@ -1035,7 +1062,7 @@ repeat(register struct parse *p, sopno s
 	case REP(0, INF):		/* as x{1,}? */
 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
 		INSERT(OCH_, start);		/* offset is wrong... */
-		repeat(p, start+1, 1, to);
+		repeat(p, start+1, 1, to, reclimit);
 		ASTERN(OOR1, start);
 		AHEAD(start);			/* ... fix it */
 		EMIT(OOR2, 0);
@@ -1055,7 +1082,7 @@ repeat(register struct parse *p, sopno s
 		ASTERN(O_CH, THERETHERE());
 		copy = dupl(p, start+1, finish+1);
 		assert(copy == finish+4);
-		repeat(p, copy, 1, to-1);
+		repeat(p, copy, 1, to-1, reclimit);
 		break;
 	case REP(1, INF):		/* as x+ */
 		INSERT(OPLUS_, start);
@@ -1063,11 +1090,11 @@ repeat(register struct parse *p, sopno s
 		break;
 	case REP(N, N):			/* as xx{m-1,n-1} */
 		copy = dupl(p, start, finish);
-		repeat(p, copy, from-1, to-1);
+		repeat(p, copy, from-1, to-1, reclimit);
 		break;
 	case REP(N, INF):		/* as xx{n-1,INF} */
 		copy = dupl(p, start, finish);
-		repeat(p, copy, from-1, to);
+		repeat(p, copy, from-1, to, reclimit);
 		break;
 	default:			/* "can't happen" */
 		SETERROR(REG_ASSERT);	/* just in case */
@@ -1108,6 +1135,8 @@ allocset(register struct parse *p)
 		nc = p->ncsalloc;
 		assert(nc % CHAR_BIT == 0);
 		nbytes = nc / CHAR_BIT * css;
+		if (MEMSIZE(p) > MEMLIMIT)
+			goto oomem;
 		if (p->g->sets == NULL)
 			p->g->sets = (cset *)malloc(nc * sizeof(cset));
 		else
@@ -1126,13 +1155,14 @@ allocset(register struct parse *p)
 			(void) memset((char *)p->g->setbits + (nbytes - css),
 								0, css);
 		else {
+oomem:
 			no = 0;
 			SETERROR(REG_ESPACE);
 			/* caller's responsibility not to do set ops */
+			return NULL;
 		}
 	}
 
-	assert(p->g->sets != NULL);	/* xxx */
 	cs = &p->g->sets[no];
 	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
 	cs->mask = 1 << ((no) % CHAR_BIT);
@@ -1419,7 +1449,8 @@ dupl(register struct parse *p, sopno sta
 	assert(finish >= start);
 	if (len == 0)
 		return(ret);
-	enlarge(p, p->ssize + len);	/* this many unexpected additions */
+	if (!enlarge(p, p->ssize + len))	/* this many unexpected additions */
+		return ret;
 	assert(p->ssize >= p->slen + len);
 	(void) memcpy((char *)(p->strip + p->slen),
 		(char *)(p->strip + start), (size_t)len*sizeof(sop));
@@ -1449,8 +1480,8 @@ doemit(register struct parse *p, sop op,
 
 	/* deal with undersized strip */
 	if (p->slen >= p->ssize)
-		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
-	assert(p->slen < p->ssize);
+		if (!enlarge(p, (p->ssize+1) / 2 * 3))	/* +50% */
+			return;
 
 	/* finally, it's all reduced to the easy case */
 	p->strip[p->slen] = op;
@@ -1516,30 +1547,35 @@ dofwd(register struct parse *p, register
 
 /*
  - enlarge - enlarge the strip
- == static void enlarge(register struct parse *p, sopno size);
+ == static int enlarge(register struct parse *p, sopno size);
  */
-static void
+static int
 enlarge(register struct parse *p, register sopno size)
 {
 	register sop *sp;
 	register RCHAR_T *dp;
+	sopno osize;
 
 	if (p->ssize >= size)
-		return;
+		return 1;
 
-	sp = (sop *)realloc(p->strip, size*sizeof(sop));
-	if (sp == NULL) {
-		SETERROR(REG_ESPACE);
-		return;
-	}
+	osize = p->ssize;
+	p->ssize = size;
+	if (MEMSIZE(p) > MEMLIMIT)
+		goto oomem;
+	sp = realloc(p->strip, p->ssize * sizeof(sop));
+	if (sp == NULL)
+		goto oomem;
 	p->strip = sp;
-	dp = (RCHAR_T *)realloc(p->stripdata, size*sizeof(RCHAR_T));
+	dp = realloc(p->stripdata, p->ssize * sizeof(RCHAR_T));
 	if (dp == NULL) {
+oomem:
+		p->ssize = osize;
 		SETERROR(REG_ESPACE);
-		return;
+		return 0;
 	}
 	p->stripdata = dp;
-	p->ssize = size;
+	return 1;
 }
 
 /*

Index: src/dist/nvi/regex/regex2.h
diff -u src/dist/nvi/regex/regex2.h:1.3 src/dist/nvi/regex/regex2.h:1.4
--- src/dist/nvi/regex/regex2.h:1.3	Mon Mar 21 14:53:03 2011
+++ src/dist/nvi/regex/regex2.h	Sat Nov 19 17:45:11 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: regex2.h,v 1.3 2011/03/21 14:53:03 tnozaki Exp $ */
+/*	$NetBSD: regex2.h,v 1.4 2011/11/19 17:45:11 tnozaki Exp $ */
 
 /*-
  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
@@ -78,7 +78,7 @@
  * immediately *preceding* "execution" of that operator.
  */
 typedef char sop;	/* strip operator */
-typedef int sopno;
+typedef size_t sopno;
 /* operators			   meaning	operand			*/
 /*						(back, fwd are offsets)	*/
 #define	OEND	(1)		/* endmarker	-			*/
@@ -140,8 +140,8 @@ struct re_guts {
 #		define	MAGIC2	((('R'^0200)<<8)|'E')
 	sop *strip;		/* malloced area for strip */
 	RCHAR_T *stripdata;	/* malloced area for stripdata */
-	int csetsize;		/* number of bits in a cset vector */
-	int ncsets;		/* number of csets in use */
+	size_t csetsize;	/* number of bits in a cset vector */
+	size_t ncsets;		/* number of csets in use */
 	cset *sets;		/* -> cset [ncsets] */
 	uch *setbits;		/* -> uch[csetsize][ncsets/CHAR_BIT] */
 	int cflags;		/* copy of regcomp() cflags argument */
@@ -152,14 +152,14 @@ struct re_guts {
 #		define	USEBOL	01	/* used ^ */
 #		define	USEEOL	02	/* used $ */
 #		define	BAD	04	/* something wrong */
-	int nbol;		/* number of ^ used */
-	int neol;		/* number of $ used */
+	size_t nbol;		/* number of ^ used */
+	size_t neol;		/* number of $ used */
 #if 0
-	int ncategories;	/* how many character categories */
+	size_t ncategories;	/* how many character categories */
 	cat_t *categories;	/* ->catspace[-CHAR_MIN] */
 #endif
 	RCHAR_T *must;		/* match must contain this string */
-	int mlen;		/* length of must */
+	size_t mlen;		/* length of must */
 	size_t nsub;		/* copy of re_nsub */
 	int backrefs;		/* does it use back references? */
 	sopno nplus;		/* how deep does it nest +s? */

Reply via email to