Module Name:    src
Committed By:   christos
Date:           Thu Feb 25 21:28:40 UTC 2021

Modified Files:
        src/lib/libc/regex: engine.c regcomp.c regexec.c regfree.c utils.h

Log Message:
Add glue to disable locale code in order to be smaller.


To generate a diff of this commit:
cvs rdiff -u -r1.27 -r1.28 src/lib/libc/regex/engine.c
cvs rdiff -u -r1.41 -r1.42 src/lib/libc/regex/regcomp.c
cvs rdiff -u -r1.24 -r1.25 src/lib/libc/regex/regexec.c
cvs rdiff -u -r1.17 -r1.18 src/lib/libc/regex/regfree.c
cvs rdiff -u -r1.7 -r1.8 src/lib/libc/regex/utils.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/lib/libc/regex/engine.c
diff -u src/lib/libc/regex/engine.c:1.27 src/lib/libc/regex/engine.c:1.28
--- src/lib/libc/regex/engine.c:1.27	Wed Feb 24 13:13:21 2021
+++ src/lib/libc/regex/engine.c	Thu Feb 25 16:28:40 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: engine.c,v 1.27 2021/02/24 18:13:21 christos Exp $ */
+/* $NetBSD: engine.c,v 1.28 2021/02/25 21:28:40 christos Exp $ */
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
@@ -41,7 +41,7 @@
 #ifdef __FBSDID
 __FBSDID("$FreeBSD: head/lib/libc/regex/engine.c 368358 2020-12-05 03:16:05Z kevans $");
 #endif
-__RCSID("$NetBSD: engine.c,v 1.27 2021/02/24 18:13:21 christos Exp $");
+__RCSID("$NetBSD: engine.c,v 1.28 2021/02/25 21:28:40 christos Exp $");
 
 #include <stdbool.h>
 
@@ -161,13 +161,14 @@ static const char *pchar(int ch);
 static const char *
 stepback(const char *start, const char *cur, int nchar)
 {
+#ifdef NLS
 	const char *ret;
 	size_t wc, mbc;
 	mbstate_t mbs;
 	size_t clen;
 
 	if (MB_CUR_MAX == 1)
-		return ((cur - nchar) > start ? cur - nchar : NULL);
+		goto out;
 
 	ret = cur;
 	for (wc = nchar; wc > 0; wc--) {
@@ -185,6 +186,10 @@ stepback(const char *start, const char *
 	}
 
 	return (ret);
+out:
+#else
+	return (cur - nchar) > start ? cur - nchar : NULL;
+#endif
 }
 
 /*

Index: src/lib/libc/regex/regcomp.c
diff -u src/lib/libc/regex/regcomp.c:1.41 src/lib/libc/regex/regcomp.c:1.42
--- src/lib/libc/regex/regcomp.c:1.41	Thu Feb 25 08:42:16 2021
+++ src/lib/libc/regex/regcomp.c	Thu Feb 25 16:28:40 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: regcomp.c,v 1.41 2021/02/25 13:42:16 christos Exp $	*/
+/*	$NetBSD: regcomp.c,v 1.42 2021/02/25 21:28:40 christos Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
@@ -47,12 +47,15 @@
 static char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 3/20/94";
 __FBSDID("$FreeBSD: head/lib/libc/regex/regcomp.c 368359 2020-12-05 03:18:48Z kevans $");
 #endif
-__RCSID("$NetBSD: regcomp.c,v 1.41 2021/02/25 13:42:16 christos Exp $");
+__RCSID("$NetBSD: regcomp.c,v 1.42 2021/02/25 21:28:40 christos Exp $");
 
 #define _OPENBSD_SOURCE
+
+#ifndef LIBHACK
 #define REGEX_GNU_EXTENSIONS
 
 #include "namespace.h"
+#endif
 #include <sys/types.h>
 #include <stdio.h>
 #include <string.h>
@@ -61,10 +64,8 @@ __RCSID("$NetBSD: regcomp.c,v 1.41 2021/
 #include <stdlib.h>
 #include <regex.h>
 #include <stdbool.h>
-#include <wchar.h>
-#include <wctype.h>
 
-#ifdef __weak_alias
+#if defined(_weak_alias) && !defined(LIBHACK)
 __weak_alias(regcomp,_regcomp)
 #endif
 
@@ -222,6 +223,46 @@ static char nuls[10];		/* place to point
 #define MIN(a,b)	((a)<(b)?(a):(b))
 #endif
 
+#ifndef NLS
+static const struct {
+	const char *name;
+	int (*func)(int);
+} wctypes[] = {
+#define ADD(x) { .name = # x, .func = is ## x }
+	ADD(alnum),
+	ADD(alpha),
+	ADD(blank),
+	ADD(cntrl),
+	ADD(digit),
+	ADD(graph),
+	ADD(lower),
+	ADD(print),
+	ADD(punct),
+	ADD(space),
+	ADD(upper),
+	ADD(xdigit),
+#undef ADD
+};
+
+wctype_t
+__regex_wctype(const char *str)
+{
+	for (size_t i = 0; i < __arraycount(wctypes); i++) {
+		if (strcmp(wctypes[i].name, str) == 0)
+			return (wctype_t)(i + 1);
+	}
+	return (wctype_t)0;
+}
+
+int
+__regex_iswctype(wint_t c, wctype_t ct)
+{
+	if (ct == 0)
+		return 0;
+	return (*wctypes[ct - 1].func)(c);
+}
+#endif
+
 static int				/* 0 success, otherwise REG_something */
 regcomp_internal(regex_t * __restrict preg,
 	const char * __restrict pattern,
@@ -1063,7 +1104,7 @@ p_range_cmp(wchar_t c1, wchar_t c2)
 {
 #ifdef REGEX_LIBC_COLLATE
 	return __wcollate_range_cmp(c1, c2);
-#else
+#elif defined(NLS)
 	/* Copied from libc/collate __wcollate_range_cmp */
 	wchar_t s1[2], s2[2];
 
@@ -1071,7 +1112,15 @@ p_range_cmp(wchar_t c1, wchar_t c2)
 	s1[1] = L'\0';
 	s2[0] = c2;
 	s2[1] = L'\0';
-	return (wcscoll(s1, s2));
+	return wcscoll(s1, s2);
+#else
+	char s1[2], s2[2];
+
+	s1[0] = (char)c1;
+	s1[1] = '\0';
+	s2[0] = (char)c2;
+	s2[1] = '\0';
+	return strcoll(s1, s2);
 #endif
 }
 
@@ -1219,6 +1268,7 @@ p_b_cclass(struct parse *p, cset *cs)
 
 	p_b_cclass_named(p, cs, clname);
 }
+
 /*
  - p_b_cclass_named - deal with a named character class
  == static void p_b_cclass_named(struct parse *p, cset *cs, const char []);
@@ -1283,9 +1333,7 @@ p_b_coll_elem(struct parse *p,
 {
 	const char *sp = p->next;
 	struct cname *cp;
-	mbstate_t mbs;
-	wchar_t wc;
-	size_t clen, len;
+	size_t len;
 
 	_DIAGASSERT(p != NULL);
 
@@ -1299,6 +1347,11 @@ p_b_coll_elem(struct parse *p,
 	for (cp = cnames; cp->name != NULL; cp++)
 		if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len)
 			return(cp->code);	/* known name */
+#ifdef NLS
+	mbstate_t mbs;
+	wchar_t wc;
+	size_t clen;
+
 	memset(&mbs, 0, sizeof(mbs));
 	if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
 		return (wc);			/* single character */
@@ -1307,6 +1360,12 @@ p_b_coll_elem(struct parse *p,
 	else
 		SETERROR(REG_ECOLLATE);		/* neither */
 	return(0);
+#else
+	if (len == 1)
+		return *sp;    /* single character */
+	SETERROR(REG_ECOLLATE);                 /* neither */
+	return 0;
+#endif
 }
 
 /*
@@ -1387,15 +1446,20 @@ bothcases(struct parse *p, wint_t ch)
 	const char *oldend = p->end;
 	char bracket[3 + MB_LEN_MAX];
 	size_t n;
-	mbstate_t mbs;
 
 	_DIAGASSERT(p != NULL);
 
 	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
 	p->next = bracket;
+#ifdef NLS
+	mbstate_t mbs;
 	memset(&mbs, 0, sizeof(mbs));
 	n = wcrtomb(bracket, ch, &mbs);
 	assert(n != (size_t)-1);
+#else
+	n = 0;
+	bracket[n++] = ch;
+#endif
 	bracket[n] = ']';
 	bracket[n + 1] = '\0';
 	p->end = bracket+n+1;
@@ -1540,6 +1604,7 @@ repeat(struct parse *p,
 static wint_t
 wgetnext(struct parse *p)
 {
+#ifdef NLS
 	mbstate_t mbs;
 	wchar_t wc;
 	size_t n;
@@ -1553,7 +1618,10 @@ wgetnext(struct parse *p)
 	if (n == 0)
 		n = 1;
 	p->next += n;
-	return (wc);
+	return wc;
+#else
+	return *p->next++;
+#endif
 }
 
 /*
@@ -1898,8 +1966,6 @@ findmust(struct parse *p, struct re_guts
 	sop s;
 	char *cp;
 	int offset;
-	char buf[MB_LEN_MAX];
-	size_t clen;
 	mbstate_t mbs;
 
 	_DIAGASSERT(p != NULL);
@@ -1933,10 +1999,15 @@ findmust(struct parse *p, struct re_guts
 				memset(&mbs, 0, sizeof(mbs));
 				newstart = scan - 1;
 			}
-			clen = wcrtomb(buf, (int)OPND(s), &mbs);
+#ifdef NLS
+			char buf[MB_LEN_MAX];
+			size_t clen = wcrtomb(buf, (int)OPND(s), &mbs);
 			if (clen == (size_t)-1)
 				goto toohard;
 			newlen += (sopno)clen;
+#else
+			newlen++;
+#endif
 			break;
 		case OPLUS_:		/* things that don't break one */
 		case OLPAREN:
@@ -2017,7 +2088,9 @@ findmust(struct parse *p, struct re_guts
 				offset++;
 			newlen = 0;
 			break;
+#ifdef NLS
 		toohard:/*FALLTHROUGH*/
+#endif
 		default:
 			/* Anything here makes it impossible or too hard
 			 * to calculate the offset -- so we give up;
@@ -2056,9 +2129,13 @@ findmust(struct parse *p, struct re_guts
 	while (cp < g->must + g->mlen) {
 		while (OP(s = *scan++) != OCHAR)
 			continue;
+#ifdef NLS
 		clen = wcrtomb(cp, (int)OPND(s), &mbs);
 		assert(clen != (size_t)-1);
 		cp += clen;
+#else
+		*cp++ = OPND(s);
+#endif
 	}
 	assert(cp == g->must + g->mlen);
 	*cp++ = '\0';		/* just on general principles */

Index: src/lib/libc/regex/regexec.c
diff -u src/lib/libc/regex/regexec.c:1.24 src/lib/libc/regex/regexec.c:1.25
--- src/lib/libc/regex/regexec.c:1.24	Wed Feb 24 13:13:21 2021
+++ src/lib/libc/regex/regexec.c	Thu Feb 25 16:28:40 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: regexec.c,v 1.24 2021/02/24 18:13:21 christos Exp $	*/
+/*	$NetBSD: regexec.c,v 1.25 2021/02/25 21:28:40 christos Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
@@ -42,7 +42,7 @@
 static char sccsid[] = "@(#)regexec.c	8.3 (Berkeley) 3/20/94";
 __FBSDID("$FreeBSD: head/lib/libc/regex/regexec.c 326025 2017-11-20 19:49:47Z pfg $");
 #endif
-__RCSID("$NetBSD: regexec.c,v 1.24 2021/02/24 18:13:21 christos Exp $");
+__RCSID("$NetBSD: regexec.c,v 1.25 2021/02/25 21:28:40 christos Exp $");
 
 /*
  * the outer shell of regexec()
@@ -52,7 +52,9 @@ __RCSID("$NetBSD: regexec.c,v 1.24 2021/
  * representations for state sets and characters.
  */
 
+#ifndef LIBHACK
 #include "namespace.h"
+#endif
 #include <sys/types.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -60,10 +62,8 @@ __RCSID("$NetBSD: regexec.c,v 1.24 2021/
 #include <limits.h>
 #include <ctype.h>
 #include <regex.h>
-#include <wchar.h>
-#include <wctype.h>
 
-#ifdef __weak_alias
+#if defined(__weak_alias) && !defined(LIBHACK)
 __weak_alias(regexec,_regexec)
 #endif
 
@@ -73,6 +73,7 @@ __weak_alias(regexec,_regexec)
 static __inline size_t
 xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
 {
+#ifdef NLS
 	size_t nr;
 	wchar_t wc;
 
@@ -88,6 +89,11 @@ xmbrtowc(wint_t *wi, const char *s, size
 		return (1);
 	} else
                 return (nr);
+#else
+	if (wi)
+		*wi = *s;
+	return 1;
+#endif
 }
 
 static __inline size_t

Index: src/lib/libc/regex/regfree.c
diff -u src/lib/libc/regex/regfree.c:1.17 src/lib/libc/regex/regfree.c:1.18
--- src/lib/libc/regex/regfree.c:1.17	Wed Feb 24 13:13:21 2021
+++ src/lib/libc/regex/regfree.c	Thu Feb 25 16:28:40 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: regfree.c,v 1.17 2021/02/24 18:13:21 christos Exp $	*/
+/*	$NetBSD: regfree.c,v 1.18 2021/02/25 21:28:40 christos Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
@@ -42,7 +42,7 @@
 static char sccsid[] = "@(#)regfree.c	8.3 (Berkeley) 3/20/94";
 __FBSDID("$FreeBSD: head/lib/libc/regex/regfree.c 326025 2017-11-20 19:49:47Z pfg $");
 #endif
-__RCSID("$NetBSD: regfree.c,v 1.17 2021/02/24 18:13:21 christos Exp $");
+__RCSID("$NetBSD: regfree.c,v 1.18 2021/02/25 21:28:40 christos Exp $");
 
 #include "namespace.h"
 #include <sys/types.h>
@@ -54,8 +54,6 @@ __RCSID("$NetBSD: regfree.c,v 1.17 2021/
 #ifdef __weak_alias
 __weak_alias(regfree,_regfree)
 #endif
-#include <wchar.h>
-#include <wctype.h>
 
 #include "utils.h"
 #include "regex2.h"

Index: src/lib/libc/regex/utils.h
diff -u src/lib/libc/regex/utils.h:1.7 src/lib/libc/regex/utils.h:1.8
--- src/lib/libc/regex/utils.h:1.7	Tue Feb 23 17:14:59 2021
+++ src/lib/libc/regex/utils.h	Thu Feb 25 16:28:40 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: utils.h,v 1.7 2021/02/23 22:14:59 christos Exp $	*/
+/*	$NetBSD: utils.h,v 1.8 2021/02/25 21:28:40 christos Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
@@ -38,6 +38,26 @@
  * $FreeBSD: head/lib/libc/regex/utils.h 341838 2018-12-12 04:23:00Z yuripv $
  */
 
+#ifdef NLS
+#include <wchar.h>
+#include <wctype.h>
+#else
+#include <ctype.h>
+typedef short wint_t;
+typedef char mbstate_t;
+typedef short wctype_t;
+#define iswupper(a) isupper(a)
+#define iswlower(a) islower(a)
+#define iswalpha(a) isalpha(a)
+#define iswalnum(a) isalnum(a)
+#define towupper(a) toupper(a)
+#define towlower(a) tolower(a)
+extern wctype_t __regex_wctype(const char *);
+extern int __regex_iswctype(wint_t, wctype_t);
+#define wctype(s) __regex_wctype(s)
+#define iswctype(c, t) __regex_iswctype((c), (t))
+#endif
+
 /* utility definitions */
 #define	DUPMAX		_POSIX2_RE_DUP_MAX	/* xxx is this right? */
 #define	INFINITY	(DUPMAX + 1)

Reply via email to