Module Name:    othersrc
Committed By:   agc
Date:           Tue Oct  5 01:23:39 UTC 2021

Modified Files:
        othersrc/external/bsd/agcre/dist: Makefile.bsd Makefile.in
            Makefile.lib.in Makefile.libtool.in agcre.h libagcre.3 main.c
        othersrc/external/bsd/agcre/dist/tests: 54.expected 62.expected
        othersrc/external/bsd/agcre/lib: Makefile shlib_version
Added Files:
        othersrc/external/bsd/agcre/dist: agcre.c
Removed Files:
        othersrc/external/bsd/agcre/dist: comp.c error.c exec.c free.c
            internal.h lex.c lex.h new.c set.c set.h unicode.c unicode.h

Log Message:
Update agcre to version 20211001

        20211001
        ========

        + move to a single source file, which can just be included as a single 
file
        (the library method can still be used for this as well - sometimes, 
it's just easier to
        drop in a single file to other projects)

        + add AGCRE_EMBED_CTAGS as a primary regular expresion type (same as 
basic regexps,
        but where a '*' occurs as a non-column 0 element of the pattern, it is 
assumed
        to be a standard character, as produced by ctags(1) output; basic 
regular expressions
        only accept '*' as a standard character as the first element of the 
expression).
        Yay.

        + make the version string in the program a real string, so it can be 
found using strings(1)

        + add an agcre_rev_regexec(3) function, which searches the text in an 
efficient, reverse
        manner, a la strrchr(3) or memrchr(3) fashion.

        + add agcre_regnsub(3) and agcre_regasub(3) functions

        + bump the shared lib version to 1.0

        + cut version 20211001


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 othersrc/external/bsd/agcre/dist/Makefile.bsd \
    othersrc/external/bsd/agcre/dist/Makefile.in \
    othersrc/external/bsd/agcre/dist/Makefile.lib.in \
    othersrc/external/bsd/agcre/dist/Makefile.libtool.in \
    othersrc/external/bsd/agcre/dist/agcre.h \
    othersrc/external/bsd/agcre/dist/main.c
cvs rdiff -u -r0 -r1.1 othersrc/external/bsd/agcre/dist/agcre.c
cvs rdiff -u -r1.2 -r0 othersrc/external/bsd/agcre/dist/comp.c \
    othersrc/external/bsd/agcre/dist/error.c
cvs rdiff -u -r1.3 -r0 othersrc/external/bsd/agcre/dist/exec.c
cvs rdiff -u -r1.1 -r0 othersrc/external/bsd/agcre/dist/free.c \
    othersrc/external/bsd/agcre/dist/internal.h \
    othersrc/external/bsd/agcre/dist/lex.c \
    othersrc/external/bsd/agcre/dist/lex.h \
    othersrc/external/bsd/agcre/dist/new.c \
    othersrc/external/bsd/agcre/dist/set.c \
    othersrc/external/bsd/agcre/dist/set.h \
    othersrc/external/bsd/agcre/dist/unicode.c \
    othersrc/external/bsd/agcre/dist/unicode.h
cvs rdiff -u -r1.2 -r1.3 othersrc/external/bsd/agcre/dist/libagcre.3
cvs rdiff -u -r1.2 -r1.3 othersrc/external/bsd/agcre/dist/tests/54.expected
cvs rdiff -u -r1.1 -r1.2 othersrc/external/bsd/agcre/dist/tests/62.expected
cvs rdiff -u -r1.1 -r1.2 othersrc/external/bsd/agcre/lib/Makefile \
    othersrc/external/bsd/agcre/lib/shlib_version

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: othersrc/external/bsd/agcre/dist/Makefile.bsd
diff -u othersrc/external/bsd/agcre/dist/Makefile.bsd:1.1 othersrc/external/bsd/agcre/dist/Makefile.bsd:1.2
--- othersrc/external/bsd/agcre/dist/Makefile.bsd:1.1	Wed Aug 16 23:38:13 2017
+++ othersrc/external/bsd/agcre/dist/Makefile.bsd	Tue Oct  5 01:23:39 2021
@@ -1,11 +1,5 @@
 PROG=	agcre
-SRCS+=	comp.c
-SRCS+=	error.c
-SRCS+=	exec.c
-SRCS+=	free.c
-SRCS+=	lex.c
-SRCS+=	set.c
-SRCS+=	unicode.c
+SRCS+=	agcre.c
 SRCS+=	main.c
 MAN=	libagcre.3
 WARNS=	5
Index: othersrc/external/bsd/agcre/dist/Makefile.in
diff -u othersrc/external/bsd/agcre/dist/Makefile.in:1.1 othersrc/external/bsd/agcre/dist/Makefile.in:1.2
--- othersrc/external/bsd/agcre/dist/Makefile.in:1.1	Wed Aug 16 23:38:13 2017
+++ othersrc/external/bsd/agcre/dist/Makefile.in	Tue Oct  5 01:23:39 2021
@@ -1,8 +1,8 @@
-# $NetBSD: Makefile.in,v 1.1 2017/08/16 23:38:13 agc Exp $
+# $NetBSD: Makefile.in,v 1.2 2021/10/05 01:23:39 agc Exp $
 
 PROG=	agcre
 
-OBJS=	comp.o error.o exec.o free.o lex.o new.o set.o unicode.o main.o
+OBJS=	agcre.o main.o
 
 PREFIX=@PREFIX@
 MANDIR=@MANDIR@
Index: othersrc/external/bsd/agcre/dist/Makefile.lib.in
diff -u othersrc/external/bsd/agcre/dist/Makefile.lib.in:1.1 othersrc/external/bsd/agcre/dist/Makefile.lib.in:1.2
--- othersrc/external/bsd/agcre/dist/Makefile.lib.in:1.1	Wed Aug 16 23:38:13 2017
+++ othersrc/external/bsd/agcre/dist/Makefile.lib.in	Tue Oct  5 01:23:39 2021
@@ -1,8 +1,8 @@
-# $NetBSD: Makefile.lib.in,v 1.1 2017/08/16 23:38:13 agc Exp $
+# $NetBSD: Makefile.lib.in,v 1.2 2021/10/05 01:23:39 agc Exp $
 
 LIB=	libagcre.a
 
-           OBJS=	comp.o error.o exec.o free.o lex.o new.o set.o unicode.o
+OBJS=	agcre.o
 
 PREFIX=@PREFIX@
 MANDIR=@MANDIR@
Index: othersrc/external/bsd/agcre/dist/Makefile.libtool.in
diff -u othersrc/external/bsd/agcre/dist/Makefile.libtool.in:1.1 othersrc/external/bsd/agcre/dist/Makefile.libtool.in:1.2
--- othersrc/external/bsd/agcre/dist/Makefile.libtool.in:1.1	Wed Aug 16 23:38:13 2017
+++ othersrc/external/bsd/agcre/dist/Makefile.libtool.in	Tue Oct  5 01:23:39 2021
@@ -1,8 +1,8 @@
-# $NetBSD: Makefile.libtool.in,v 1.1 2017/08/16 23:38:13 agc Exp $
+# $NetBSD: Makefile.libtool.in,v 1.2 2021/10/05 01:23:39 agc Exp $
 
 LIB=	libagcre.a
 
-OBJS=	comp.o error.o exec.o free.o lex.o new.o set.o unicode.o
+OBJS=	agcre.o
 
 PREFIX=@PREFIX@
 MANDIR=@MANDIR@
Index: othersrc/external/bsd/agcre/dist/agcre.h
diff -u othersrc/external/bsd/agcre/dist/agcre.h:1.1 othersrc/external/bsd/agcre/dist/agcre.h:1.2
--- othersrc/external/bsd/agcre/dist/agcre.h:1.1	Wed Aug 16 23:38:13 2017
+++ othersrc/external/bsd/agcre/dist/agcre.h	Tue Oct  5 01:23:39 2021
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013,2017 Alistair Crooks. All Rights reserved.
+ * Copyright (c) 2013,2017,2020,2021 Alistair Crooks. All Rights reserved.
  * Copyright (c) 2007-2009 Russ Cox, Google Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,7 +29,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #ifndef AGCRE_VERSION
-#define AGCRE_VERSION	20170801
+#define AGCRE_VERSION	20211001
 
 #include <inttypes.h>
 
@@ -52,12 +52,15 @@
 #define REG_UTF32BE		AGCRE_REG_UTF32BE
 #define REG_SUCCESS		AGCRE_REG_SUCCESS
 #define REG_FAILURE		AGCRE_REG_FAILURE
+#define REG_NOMATCH		AGCRE_REG_FAILURE
 #define REG_MAGIC		AGCRE_MAGIC
 #define REG_MAX_SUBEXPR		AGCRE_MAX_SUBEXPR
 #define REG_MAX_EXPR_LENGTH	AGCRE_MAX_EXPR_LENGTH
+#define REG_ANCHOR		AGCRE_REG_ANCHOR
+#define REG_CTAGS		AGCRE_REG_CTAGS
 #define regoff_t		agcre_regoff_t
 #define regmatch_t		agcre_regmatch_t
-#define regex_t			agcre_re_t
+#define regex_t			agcre_regex_t
 #define regcomp			agcre_regcomp
 #define regexec			agcre_regexec
 #define regfree			agcre_regfree
@@ -80,6 +83,8 @@
 #define AGCRE_REG_UTF16BE	0x00002000
 #define AGCRE_REG_UTF32LE	0x00004000
 #define AGCRE_REG_UTF32BE	0x00008000
+#define AGCRE_REG_ANCHOR	0x00010000
+#define AGCRE_REG_CTAGS		0x00020000
 
 #define AGCRE_REG_SUCCESS	0
 #define AGCRE_REG_FAILURE	1
@@ -99,12 +104,12 @@ typedef struct agcre_regmatch_t {
 } agcre_regmatch_t;
 
 /* a regexp structure */
-typedef struct agcre_re_t {
+typedef struct agcre_regex_t {
 	uint32_t	 re_magic;	/* magic number */
 	size_t		 re_nsub;	/* number of subexpressions */
 	const char	*re_endp;	/* pointer to end of expr */
 	void		*re_g;		/* internals */
-} agcre_re_t;
+} agcre_regex_t;
 
 #ifndef __BEGIN_DECLS
 #  if defined(__cplusplus)
@@ -118,13 +123,19 @@ typedef struct agcre_re_t {
 
 __BEGIN_DECLS
 
-agcre_re_t *agcre_new(void);
-int agcre_regcomp(agcre_re_t * /*re*/, const void * /*s*/, uint32_t /*flags*/);
-int agcre_regexec(agcre_re_t * /*re*/, const void * /*vs*/, size_t /*mc*/,
-		agcre_regmatch_t * /*m*/, uint32_t /*flags*/);
-void agcre_regfree(agcre_re_t * /*re*/);
-size_t agcre_regerror(int /*errcode*/, const agcre_re_t * /*agcre*/,
-		char * /*errbuf*/, size_t /*size*/);
+agcre_regex_t *agcre_new(void);
+int agcre_regcomp(agcre_regex_t */*re*/, const void */*s*/, uint32_t /*flags*/);
+int agcre_regexec(agcre_regex_t */*re*/, const void */*vs*/, size_t /*mc*/,
+		agcre_regmatch_t */*m*/, uint32_t /*flags*/);
+int agcre_rev_regexec(agcre_regex_t */*agcre*/, const void */*vs*/,
+		size_t /*matchc*/, agcre_regmatch_t */*m*/, uint32_t /*flags*/);
+void agcre_regfree(agcre_regex_t */*re*/);
+size_t agcre_regerror(int /*errcode*/, const agcre_regex_t */*agcre*/,
+		char */*errbuf*/, size_t /*size*/);
+ssize_t agcre_regnsub(char */*buf*/, size_t /*size*/, const char */*repl*/,
+		const agcre_regmatch_t */*rm*/, const char */*str*/);
+ssize_t agcre_regasub(char **/*buf*/, const char */*repl*/,
+		const agcre_regmatch_t */*rm*/, const char */*str*/);
 
 __END_DECLS
 
Index: othersrc/external/bsd/agcre/dist/main.c
diff -u othersrc/external/bsd/agcre/dist/main.c:1.1 othersrc/external/bsd/agcre/dist/main.c:1.2
--- othersrc/external/bsd/agcre/dist/main.c:1.1	Wed Aug 16 23:38:13 2017
+++ othersrc/external/bsd/agcre/dist/main.c	Tue Oct  5 01:23:39 2021
@@ -67,6 +67,10 @@ typedef struct file_t {
 	uint8_t		 binary;	/* file is a "binary" */
 } file_t;
 
+#define STRINGIFY(x)	#x
+
+#define AGCRE_VERSION_STRING(x)	STRINGIFY(x)
+
 static inline int
 isbinary(char *buf, size_t size)
 {
@@ -133,7 +137,7 @@ static char	to[2][10];
 
 /* display the match */
 static int
-display_match(agcre_re_t *re, agcre_regmatch_t *m, rtflags_t *rtflags, char *s,
+display_match(agcre_regex_t *re, agcre_regmatch_t *m, rtflags_t *rtflags, char *s,
 	agcre_regoff_t start, agcre_regoff_t len)
 {
 	size_t	i;
@@ -160,7 +164,7 @@ display_match(agcre_re_t *re, agcre_regm
 
 /* search a line in a file */
 static int
-doline(agcre_re_t *re, rtflags_t *rtflags, file_t *file, char *line, size_t len, agcre_regmatch_t *m)
+doline(agcre_regex_t *re, rtflags_t *rtflags, file_t *file, char *line, size_t len, agcre_regmatch_t *m)
 {
 	agcre_regoff_t	start;
 	int		flags;
@@ -203,7 +207,7 @@ doline(agcre_re_t *re, rtflags_t *rtflag
 
 /* search in a file stream */
 static int
-dofp(agcre_re_t *re, FILE *fp, rtflags_t *rtflags, const char *f)
+dofp(agcre_regex_t *re, FILE *fp, rtflags_t *rtflags, const char *f)
 {
 	agcre_regmatch_t	*m;
 	rtflags_t		 localflags;
@@ -258,7 +262,7 @@ dofp(agcre_re_t *re, FILE *fp, rtflags_t
 
 /* search in a file/dir */
 static int
-dofile(agcre_re_t *re, const char *f, rtflags_t *rtflags)
+dofile(agcre_regex_t *re, const char *f, rtflags_t *rtflags)
 {
 	struct dirent	 entry;
 	struct dirent	*d;
@@ -333,7 +337,7 @@ readexpression(const char *f)
 int
 main(int argc, char **argv)
 {
-	agcre_re_t	 re;
+	agcre_regex_t	 re;
 	rtflags_t	 rtflags;
 	uint32_t	 pend;
 	char		*expression;
@@ -351,7 +355,7 @@ main(int argc, char **argv)
 	file = expression = NULL;
 	c = 0;
 	pend = 0;
-	while ((i = getopt(argc, argv, "DOP:S:T:Ve:f:ilm:nrtv")) != -1) {
+	while ((i = getopt(argc, argv, "DOP:S:T:Vae:f:ilm:nrtv")) != -1) {
 		switch(i) {
 		case 'D':
 			flags |= AGCRE_REG_DUMP;
@@ -380,8 +384,11 @@ main(int argc, char **argv)
 			}
 			break;
 		case 'V':
-			printf("agcre %d\n", AGCRE_VERSION);
+			printf("agcre " AGCRE_VERSION_STRING(AGCRE_VERSION) "\n");
 			exit(EXIT_SUCCESS);
+		case 'a':
+			flags |= AGCRE_REG_ANCHOR;
+			break;
 		case 'e':
 			expression = optarg;
 			break;

Index: othersrc/external/bsd/agcre/dist/libagcre.3
diff -u othersrc/external/bsd/agcre/dist/libagcre.3:1.2 othersrc/external/bsd/agcre/dist/libagcre.3:1.3
--- othersrc/external/bsd/agcre/dist/libagcre.3:1.2	Thu Aug 17 11:03:21 2017
+++ othersrc/external/bsd/agcre/dist/libagcre.3	Tue Oct  5 01:23:39 2021
@@ -1,6 +1,6 @@
-.\" $NetBSD: libagcre.3,v 1.2 2017/08/17 11:03:21 wiz Exp $
+.\" $NetBSD: libagcre.3,v 1.3 2021/10/05 01:23:39 agc Exp $
 .\"
-.\" Copyright (c) 2017 Alistair Crooks <a...@netbsd.org>
+.\" Copyright (c) 2017,2020,2021 Alistair Crooks <a...@netbsd.org>
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
@@ -23,14 +23,14 @@
 .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd July 4, 2017
-.Dt LIBAGCRE 3
+.Dd October 1, 2021
+.Dt AGCRE-EMBED 3
 .Os
 .Sh NAME
-.Nm libagcre
+.Nm agcre
 .Nd regular expression library
 .Sh LIBRARY
-.Lb libagcre
+.Lb agcre
 .Sh SYNOPSIS
 .In agcre.h
 .Ft "agcre_regex_t *"
@@ -39,11 +39,15 @@
 .Fc
 .Ft "int"
 .Fo agcre_regcomp
-.Fa "agcre_regex_t *expression" "const void *pat" "int flags"
+.Fa "agcre_regex_t *expression" "const void *pat" "uint32_t flags"
 .Fc
 .Ft int
 .Fo agcre_regexec
-.Fa "agcre_regex_t *expression" "const void *input" "size_t nmatch" "agcre_regmatch_t *matches" "int flags"
+.Fa "agcre_regex_t *expression" "const void *input" "size_t nmatch" "agcre_regmatch_t *matches" "uint32_t flags"
+.Fc
+.Ft int
+.Fo agcre_rev_regexec
+.Fa "agcre_regex_t *expression" "const void *input" "size_t nmatch" "agcre_regmatch_t *matches" "uint32_t flags"
 .Fc
 .Ft void
 .Fo agcre_regfree
@@ -53,6 +57,14 @@
 .Fo agcre_regerror
 .Fa "int errnum" "const agcre_regex_t *expression" "char *buf" "size_t size"
 .Fc
+.Ft ssize_t
+.Fo agcre_regnsub
+.Fa "char *buf" "size_t size" "const char *repl" "const agcre_regmatch_t *matches" "const char *str"
+.Fc
+.Ft ssize_t
+.Fo agcre_regasub
+.Fa "char **buf" "const char *repl" "const agcre_regmatch_t *matches" "const char *str"
+.Fc
 .Sh DESCRIPTION
 The
 .Nm
@@ -119,10 +131,6 @@ expression, the individual virtual machi
 will be printed on standard output.
 .It Dv AGCRE_REG_TRACE
 Traces execution of the regular expression.
-.It Dv AGCRE_REG_LARGE
-Is deprecated, and only recognised for legacy reasons.
-.It Dv AGCRE_REG_BACKR
-Is deprecated, and only recognised for legacy reasons.
 .\".It Dv AGCRE_IN_8BITS
 .\"This indicates to the
 .\".Fn agcre_regcomp
@@ -186,6 +194,32 @@ and
 fields provided in the zero-th element of the
 .Fa matchv
 argument.
+.It Dv AGCRE_REG_ANCHOR
+Normally regular expressions are not assumed to start at the first character,
+but rather to have a lazy search at their start.
+Occasionally, a search anchored to the start of the
+string is preferred.
+This cannot be simulated by the
+.Dv AGCRE_REG_NOTBOL
+flag.
+Searches where a match needs to be found in the first character
+should therefore use the
+.Dv AGCRE_REG_ANCHOR
+definition.
+Anchoring can be specified at expression compile time,
+and/or at expression execution time.
+.It Dv AGCRE_REG_ANCHOR
+This is used where a regular expression pattern was generated by the
+.Xr ctags 1
+program, since the syntax generated does not conform to a standard 
+basic regular expression.
+In particular,
+.Xr ctags 1
+outputs function location patterns which contain an unescaped
+.Dq *
+character, whereas basic regular expressions will only
+recognise an asterisk as a standard character if it appears
+at the start of an expression.
 .\".It Dv AGCRE_IN_8BITS
 .\"This indicates to the
 .\".Fn agcre_regexec
@@ -208,6 +242,20 @@ argument.
 .\"integers.
 .El
 .Pp
+Since normal expression searches take place moving forwards within a byte stream,
+there is not normally any need to have a function which searches a byte stream or buffer
+moving in reverse.
+However, editors frequently must implement this functionality on their own.
+To make reverse searching easier, the
+.Fn agcre_rev_regexec
+function is provided, which will start at the end of the sized
+buffer, using the
+.Dv AGCRE_REG_STARTEND
+definition, and the search will continue character by character
+until either a match is found or the start of the buffer is reached.
+This search will be done in an efficient manner, so that buffer
+contents which have already been searched will not be searched again.
+.Pp
 The
 .Fn agcre_regcomp
 and

Index: othersrc/external/bsd/agcre/dist/tests/54.expected
diff -u othersrc/external/bsd/agcre/dist/tests/54.expected:1.2 othersrc/external/bsd/agcre/dist/tests/54.expected:1.3
--- othersrc/external/bsd/agcre/dist/tests/54.expected:1.2	Sun Dec  3 21:25:56 2017
+++ othersrc/external/bsd/agcre/dist/tests/54.expected	Tue Oct  5 01:23:39 2021
@@ -1,4 +1,6 @@
+/usr/include/dev/ic/nvmeio.h:136:	for (i = 0; i < __arraycount(identify->lbaf); i++)
+/usr/include/dev/ic/nvmeio.h:192:	for (i = 0; i < __arraycount(identify->psd); i++)
 /usr/include/rump/rumpuser_port.h:260:#ifndef __arraycount
 /usr/include/rump/rumpuser_port.h:261:#define __arraycount(_ar_) (sizeof(_ar_)/sizeof(_ar_[0]))
 /usr/include/sys/bitops.h:324:	for (__i = 0; __i < __arraycount(__v->_b); __i++)		\
-/usr/include/sys/cdefs.h:573:#define	__arraycount(__x)	(sizeof(__x) / sizeof(__x[0]))
+/usr/include/sys/cdefs.h:636:#define	__arraycount(__x)	(sizeof(__x) / sizeof(__x[0]))

Index: othersrc/external/bsd/agcre/dist/tests/62.expected
diff -u othersrc/external/bsd/agcre/dist/tests/62.expected:1.1 othersrc/external/bsd/agcre/dist/tests/62.expected:1.2
--- othersrc/external/bsd/agcre/dist/tests/62.expected:1.1	Wed Aug 16 23:38:13 2017
+++ othersrc/external/bsd/agcre/dist/tests/62.expected	Tue Oct  5 01:23:39 2021
@@ -1 +1 @@
-agcre 20170801
+agcre 20211001

Index: othersrc/external/bsd/agcre/lib/Makefile
diff -u othersrc/external/bsd/agcre/lib/Makefile:1.1 othersrc/external/bsd/agcre/lib/Makefile:1.2
--- othersrc/external/bsd/agcre/lib/Makefile:1.1	Wed Aug 16 23:38:13 2017
+++ othersrc/external/bsd/agcre/lib/Makefile	Tue Oct  5 01:23:39 2021
@@ -1,14 +1,7 @@
-# $NetBSD: Makefile,v 1.1 2017/08/16 23:38:13 agc Exp $
+# $NetBSD: Makefile,v 1.2 2021/10/05 01:23:39 agc Exp $
 
 LIB=		agcre
-SRCS+=		comp.c
-SRCS+=		error.c
-SRCS+=		exec.c
-SRCS+=		free.c
-SRCS+=		lex.c
-SRCS+=		new.c
-SRCS+=		set.c
-SRCS+=		unicode.c
+SRCS+=		agcre.c
 MAN=		libagcre.3 agcre_format.7
 MLINKS+=	libagcre.3 agcre_regcomp.3
 MLINKS+=	libagcre.3 agcre_regexec.3
Index: othersrc/external/bsd/agcre/lib/shlib_version
diff -u othersrc/external/bsd/agcre/lib/shlib_version:1.1 othersrc/external/bsd/agcre/lib/shlib_version:1.2
--- othersrc/external/bsd/agcre/lib/shlib_version:1.1	Wed Aug 16 23:38:13 2017
+++ othersrc/external/bsd/agcre/lib/shlib_version	Tue Oct  5 01:23:39 2021
@@ -1,2 +1,2 @@
-major=0
+major=1
 minor=0

Added files:

Index: othersrc/external/bsd/agcre/dist/agcre.c
diff -u /dev/null othersrc/external/bsd/agcre/dist/agcre.c:1.1
--- /dev/null	Tue Oct  5 01:23:39 2021
+++ othersrc/external/bsd/agcre/dist/agcre.c	Tue Oct  5 01:23:39 2021
@@ -0,0 +1,3065 @@
+/*-
+ * Copyright (c) 2013,2017 Alistair Crooks. All Rights reserved.
+ * All rights reserved.
+ *
+ * Parts of this are:
+ * Copyright (c) 2007-2009 Russ Cox, Google Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "agcre.h"
+
+/* callback struct */
+typedef struct cb_t {
+	int		(*func)(uint32_t);	/* function */
+	uint8_t		  negate;		/* take the complement of the result */
+} cb_t;
+
+/* sets are implemented as combinations of ranges, individual cases, and callbacks */
+typedef struct set_t {
+	uint32_t	 rangec;		/* # of ranges */
+	uint32_t	 maxrange;		/* size of range array */
+	uint32_t	*ranges;		/* range array */
+	uint32_t	 casec;			/* # of cases */
+	uint32_t	 maxcase;		/* size of case array */
+	uint32_t	*cases;			/* case array */
+	uint32_t	 callbackc;		/* # of callbacks */
+	uint32_t	 maxcallback;		/* size of callback array */
+	cb_t		*callbacks;		/* callback array */
+	uint8_t		 complement;		/* complement of set */
+} set_t;
+
+#define AT_LEAST	0xffffffff
+#define BOF		0xffffffffU
+
+/* a regexp token */
+typedef struct retoken_t {
+	struct retoken_t	*left;		/* token on left */
+	struct retoken_t	*right;		/* token on right */
+	uint64_t	 	 so;		/* start offset */
+	uint32_t		 n;		/* number */
+	uint32_t	 	 nparens;	/* () count */
+	uint32_t	 	 lo;		/* repeat low count */
+	uint32_t	 	 hi;		/* repeat high count */
+	uint32_t	 	 ch;		/* current char */
+	uint8_t			 type;		/* type of token */
+	uint8_t			 lazy;		/* use lazy matching */
+	uint8_t		 	 noncapture;	/* non-capture subexpression */
+	uint8_t		 	 icase;		/* icase token */
+} retoken_t;
+
+/* token types */
+#define TOK_ALT			 0x01
+#define TOK_BACKREF		 0x02
+#define TOK_BROKEN		 0x03
+#define TOK_CAT			 0x04
+#define TOK_COLLECTION		 0x05
+#define TOK_DOT			 0x06
+#define TOK_EOL			 0x07
+#define TOK_FORGET		 0x08
+#define TOK_LIT			 0x09
+#define TOK_PAREN		 0x0a
+#define TOK_PLUS		 0x0b
+#define TOK_QUEST		 0x0c
+#define TOK_REP			 0x0d
+#define TOK_SET			 TOK_COLLECTION
+#define TOK_STAR		 0x0e
+#define TOK_WIDTH		 0x0f
+#define TOK_ZEROWIDTH		 0x10
+
+/* input structure */
+typedef struct input_t {
+	const char	*s;		/* string */
+	uint64_t	 so;		/* start offset */
+	uint64_t	 eo;		/* end offset */
+	uint64_t	 c;		/* current offset */
+	int		 msgc;		/* # of chars in message buf */
+	char		 msg[256];	/* message buffer */
+	retoken_t	 parse;		/* current token when parsing */
+	uint32_t	 parenc;	/* # of parentheses */
+	struct re_t	*re;		/* pointer to internal re */
+	uint32_t	 ch;		/* current char */
+	uint32_t	 prevch;	/* previous char */
+	uint8_t		 bytes;		/* size of current char */
+	uint8_t		 prevbytes;	/* size of previous char */
+	uint8_t		 icase;		/* persistent icase */
+} input_t;
+
+#define AGCRE_IN_WIDTH	\
+	(AGCRE_REG_UTF16LE | AGCRE_REG_UTF16BE | AGCRE_REG_UTF32LE | AGCRE_REG_UTF32BE)
+
+/* virtual machine instruction */
+typedef struct instr_t {
+	struct instr_t	*lhs;	/* lhs */
+	struct instr_t	*rhs;	/* rhs */
+	uint32_t	 n;	/* character */
+	uint32_t	 gen;	/* generation number */
+	uint8_t		 op;	/* instruction number */
+	uint8_t		 icase;	/* instruction number */
+} instr_t;
+
+/* instruction opcodes */
+#define OP_ANY		0x01
+#define OP_BACKREF	0x02
+#define OP_CHAR		0x03
+#define OP_JUMP		0x04
+#define OP_MATCH	0x05
+#define OP_SAVE		0x06
+#define OP_SET		0x07
+#define OP_SPLIT	0x08
+#define OP_ZEROWIDTH	0x09
+
+/* a context structure */
+typedef struct context_t {
+	struct context_t	*next;		/* for freeing nodes */
+	uint32_t 		 ref;		/* reference count */
+	uint32_t 		 nsub;		/* # matches */
+	uint32_t 		 c;		/* chars in backref */
+	agcre_regmatch_t	 sub[1];	/* match structures */
+} context_t;
+
+/* a thread structure */
+typedef struct re_thread_t {
+	instr_t		*pc;			/* instruction */
+	context_t	*ctx;			/* context */
+} re_thread_t;
+
+/* list of threads */
+typedef struct threadlist_t {
+	uint32_t	nthreads;		/* # of threads */
+	re_thread_t	t[1];			/* the threads */
+} threadlist_t;
+
+/* regular expression internals */
+typedef struct re_t {
+	instr_t		*prog;		/* start of instructions */
+	uint32_t	 instrc;	/* # of instructions */
+	uint32_t	 gen;		/* generation number */
+	uint32_t	 setc;		/* # of sets */
+	uint32_t	 maxset;	/* allocated # of sets */
+	set_t		*sets;		/* sets */
+	uint32_t	 flags;		/* comp/exec flags */
+	context_t	*ctxlist;	/* list of contexts */
+	instr_t		*pc;		/* prog counter */
+	int		 msgc;		/* # of chars in msg buffer */
+	char		 msg[256];	/* message buffer */
+} re_t;
+
+/*
+ * regular expressions have an implicit .* at the start to find the
+ * first match. for anchored searches (not that many in comparison
+ * to regular regular expressions) we need to jump over those
+ * instructions (in the regexec phase).
+ * Dump of compiled expression instructions for the pattern: "i"
+ * (without quotes)
+ *
+ * Cat(LazyStar(Dot), Paren(0, Lit(i)))
+ *  0. split 3, 1
+ *  1. any
+ *  2. jmp 0
+ *  3. save 0
+ *  4. char i
+ *  5. save 1
+ *  6. match
+ *
+ * so for anchored searches, we jump over the first 3 instructions
+ */
+#define LEADING_DOT_STAR_INSTRS		3
+
+/* all static function forward declarations */
+__BEGIN_DECLS
+
+static void set_free(set_t */*set*/);
+static int set_add_case(set_t */*set*/, uint32_t /*n*/);
+static int set_add_range(set_t */*set*/, uint32_t /*lo*/, uint32_t /*hi*/);
+static int set_add_callback(set_t */*set*/, int (*/*func*/)(uint32_t), int /*negate*/);
+static int set_isset(set_t */*set*/, uint32_t /*n*/);
+static int set_complement(set_t */*set*/);
+
+static retoken_t *parse_extended(agcre_regex_t */*re*/, input_t */*in*/);
+static retoken_t *parse_nospec(agcre_regex_t */*re*/, input_t */*in*/);
+static retoken_t *parse_basic(agcre_regex_t */*re*/, input_t */*in*/, uint32_t /*flags*/);
+
+static int get_next_token(input_t */*in*/);
+
+static int unicode_isWhite_Space(uint32_t /*ch*/);
+static int unicode_isDash(uint32_t /*ch*/);
+static int unicode_isHyphen(uint32_t /*ch*/);
+static int unicode_isQuotation_Mark(uint32_t /*ch*/);
+static int unicode_isTerminal_Punctuation(uint32_t /*ch*/);
+static int unicode_isHex_Digit(uint32_t /*ch*/);
+static int unicode_isOther_Alphabetic(uint32_t /*ch*/);
+static int unicode_isOther_Lowercase(uint32_t /*ch*/);
+static int unicode_isOther_Uppercase(uint32_t /*ch*/);
+static int unicode_isPattern_White_Space(uint32_t /*ch*/);
+static int unicode_isalnum(uint32_t /*ch*/);
+static int unicode_isalpha(uint32_t /*ch*/);
+static int unicode_isblank(uint32_t /*ch*/);
+static int unicode_iscntrl(uint32_t /*ch*/);
+static int unicode_isdigit(uint32_t /*ch*/);
+static int unicode_isgraph(uint32_t /*ch*/);
+static int unicode_islower(uint32_t /*ch*/);
+static int unicode_isprint(uint32_t /*ch*/);
+static int unicode_ispunct(uint32_t /*ch*/);
+static int unicode_isspace(uint32_t /*ch*/);
+static int unicode_isupper(uint32_t /*ch*/);
+static int unicode_isxdigit(uint32_t /*ch*/);
+static int unicode_isident(uint32_t /*ch*/);
+static uint32_t unicode_tolower(uint32_t /*ch*/);
+static uint32_t unicode_toupper(uint32_t /*ch*/);
+
+__END_DECLS
+
+#ifndef __printflike
+#define	__printflike(n, m)		__attribute__((format(printf,n,m)))
+#endif
+#ifndef howmany
+#define howmany(x, y)			(((x)+((y)-1))/(y))
+#endif
+
+/* create a new token */
+static retoken_t *
+newtok(int type, retoken_t *left, retoken_t *right, uint8_t icase)
+{
+	retoken_t	*token;
+	
+	if ((token = calloc(1, sizeof(*token))) != NULL) {
+		token->type = type;
+		token->left = left;
+		token->right = right;
+		token->icase = icase;
+	}
+	return token;
+}
+
+/* how many instructions does tok need? */
+static int
+count(retoken_t *tok, int *errc)
+{
+	switch(tok->type) {
+	case TOK_ALT:
+		return 2 + count(tok->left, errc) + count(tok->right, errc);
+	case TOK_BACKREF:
+	case TOK_COLLECTION:
+	case TOK_DOT:
+	case TOK_LIT:
+	case TOK_WIDTH:
+	case TOK_ZEROWIDTH:
+		return 1;
+	case TOK_FORGET:
+		return 0;
+	case TOK_CAT:
+		return count(tok->left, errc) + count(tok->right, errc);
+	case TOK_PAREN:
+	case TOK_STAR:
+		return 2 + count(tok->left, errc);
+	case TOK_PLUS:
+	case TOK_QUEST:
+		return 1 + count(tok->left, errc);
+	default:
+		*errc = 1;
+		return 0;
+	}
+}
+
+/* make a new set */
+static inline set_t *
+newset(re_t *re)
+{
+	set_t	*set;
+
+	if (re->setc == re->maxset) {
+		set = realloc(re->sets, (re->maxset + 8) * sizeof(*set));
+		if (set == NULL) {
+			//error_message(in, "can't extend sets past %u", in->re->maxset);
+			return NULL;
+		}
+		memset(&set[re->setc], 0x0, 8 * sizeof(*set));
+		re->maxset += 8;
+		re->sets = set;
+	}
+	return &re->sets[re->setc++];
+}
+
+/* take parse tokens, and emit vm instructions */
+static int
+emit(re_t *re, retoken_t *tok)
+{
+	instr_t	*p1;
+	instr_t	*p2;
+	instr_t	*t;
+	set_t	*set;
+
+	switch(tok->type) {
+	case TOK_ALT:
+		re->pc->op = OP_SPLIT;
+		p1 = re->pc++;
+		p1->lhs = re->pc;
+		emit(re, tok->left);
+		re->pc->op = OP_JUMP;
+		p2 = re->pc++;
+		p1->rhs = re->pc;
+		emit(re, tok->right);
+		p2->lhs = re->pc;
+		return 1;
+	case TOK_BACKREF:
+		re->pc->op = OP_BACKREF;
+		re->pc->n = tok->ch;
+		re->pc->icase = tok->icase;
+		re->pc++;
+		return 1;
+	case TOK_CAT:
+		emit(re, tok->left);
+		emit(re, tok->right);
+		return 1;
+	case TOK_COLLECTION:
+		re->pc->op = OP_SET;
+		re->pc->n = tok->ch;
+		re->pc->icase = tok->icase;
+		re->pc++;
+		return 1;
+	case TOK_DOT:
+		re->pc++->op = OP_ANY;
+		return 1;
+	case TOK_LIT:
+		re->pc->op = OP_CHAR;
+		re->pc->n = tok->ch;
+		re->pc->icase = tok->icase;
+		re->pc++;
+		return 1;
+	case TOK_PAREN:
+		re->pc->op = OP_SAVE;
+		re->pc->n = 2*tok->nparens;
+		re->pc++;
+		emit(re, tok->left);
+		re->pc->op = OP_SAVE;
+		re->pc->n = 2*tok->nparens + 1;
+		re->pc++;
+		return 1;
+	case TOK_PLUS:
+		p1 = re->pc;
+		emit(re, tok->left);
+		re->pc->op = OP_SPLIT;
+		re->pc->lhs = p1;
+		p2 = re->pc;
+		re->pc++;
+		p2->rhs = re->pc;
+		if (tok->lazy) {
+			t = p2->lhs;
+			p2->lhs = p2->rhs;
+			p2->rhs = t;
+		}
+		return 1;
+	case TOK_QUEST:
+		re->pc->op = OP_SPLIT;
+		p1 = re->pc++;
+		p1->lhs = re->pc;
+		emit(re, tok->left);
+		p1->rhs = re->pc;
+		if (tok->lazy) {
+			t = p1->lhs;
+			p1->lhs = p1->rhs;
+			p1->rhs = t;
+		}
+		return 1;
+	case TOK_STAR:
+		re->pc->op = OP_SPLIT;
+		p1 = re->pc++;
+		p1->lhs = re->pc;
+		emit(re, tok->left);
+		re->pc->op = OP_JUMP;
+		re->pc->lhs = p1;
+		re->pc++;
+		p1->rhs = re->pc;
+		if (tok->lazy) {
+			t = p1->lhs;
+			p1->lhs = p1->rhs;
+			p1->rhs = t;
+		}
+		return 1;
+	case TOK_WIDTH:
+		re->pc->op = OP_SET;
+		re->pc->n = re->setc;
+		re->pc->icase = tok->icase;
+		if ((set = newset(re)) == NULL) {
+			return 0;
+		}
+		set_add_callback(set,
+			(tok->ch == 'd' || tok->ch == 'D') ? unicode_isdigit :
+			(tok->ch == 'p' || tok->ch == 'P') ? unicode_isprint :
+			(tok->ch == 's' || tok->ch == 'S') ? unicode_isspace :
+								unicode_isident,
+			unicode_isupper(tok->ch));
+		re->pc++;
+		return 1;
+	case TOK_ZEROWIDTH:
+		re->pc->op = OP_ZEROWIDTH;
+		re->pc->n = tok->ch;
+		re->pc++;
+		return 1;
+	}
+	return 1;
+}
+
+/* recursive function to free a token and its children */
+static void
+freetok(retoken_t *tok)
+{
+	if (tok) {
+		freetok(tok->left);
+		freetok(tok->right);
+		free(tok);
+	}
+}
+
+/* recursive function to print a token and its children */
+static void
+print_token(retoken_t *tok)
+{
+	if (tok->icase) {
+		printf("Icase+");
+	}
+	switch(tok->type) {
+	case TOK_ALT:
+		printf("Alt(");
+		print_token(tok->left);
+		printf(", ");
+		print_token(tok->right);
+		printf(")");
+		break;
+	case TOK_BACKREF:
+		printf("Backref(%d)", tok->ch);
+		break;
+	case TOK_CAT:
+		printf("Cat(");
+		print_token(tok->left);
+		printf(", ");
+		print_token(tok->right);
+		printf(")");
+		break;
+	case TOK_COLLECTION:
+		printf("Set(%u)", tok->ch);
+		break;
+	case TOK_DOT:
+		printf("Dot");
+		break;
+	case TOK_LIT:
+		printf("Lit(%c)", tok->ch);
+		break;
+	case TOK_PAREN:
+		printf("Paren(%d, ", tok->nparens);
+		print_token(tok->left);
+		printf(")");
+		break;
+	case TOK_PLUS:
+		if (tok->lazy) {
+			printf("Lazy");
+		}
+		printf("Plus(");
+		print_token(tok->left);
+		printf(")");
+		break;
+	case TOK_QUEST:
+		if (tok->lazy) {
+			printf("Lazy");
+		}
+		printf("Quest(");
+		print_token(tok->left);
+		printf(")");
+		break;
+	case TOK_STAR:
+		if (tok->lazy) {
+			printf("Lazy");
+		}
+		printf("Star(");
+		print_token(tok->left);
+		printf(")");
+		break;
+	case TOK_WIDTH:
+		printf("Width(%c)", tok->ch);
+		break;
+	case TOK_ZEROWIDTH:
+		printf("Zerowidth(%c)", tok->ch);
+		break;
+	default:
+		printf("???");
+		break;
+	}
+}
+
+/* print all the instructions in the program */
+static int
+printprog(re_t *re)
+{
+	uint32_t	 pc;
+	instr_t		*instr;
+	
+	for (pc = 0, instr = re->prog ; pc < re->instrc ; pc++, instr++) {
+		switch(instr->op) {
+		case OP_ANY:
+			printf("%2d. any\n", pc);
+			break;
+		case OP_BACKREF:
+			printf("%2d. backref %d\n", pc, instr->n);
+			break;
+		case OP_CHAR:
+			printf("%2d. char %c\n", pc, instr->n);
+			break;
+		case OP_JUMP:
+			printf("%2d. jmp %d\n",
+				pc,
+				(int)(instr->lhs - re->prog));
+			break;
+		case OP_MATCH:
+			printf("%2d. match\n", pc);
+			break;
+		case OP_SAVE:
+			printf("%2d. save %d\n", pc, instr->n);
+			break;
+		case OP_SET:
+			printf("%2d. set %d\n", pc, instr->n);
+			break;
+		case OP_SPLIT:
+			printf("%2d. split %d, %d\n",
+				pc,
+				(int)(instr->lhs - re->prog),
+				(int)(instr->rhs - re->prog));
+			break;
+		case OP_ZEROWIDTH:
+			printf("%2d. zerowidth(%c)\n", pc, instr->n);
+			break;
+		default:
+			fprintf(stderr, "printprog - bad type %d\n", instr->op);
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static retoken_t *rec_repeat(input_t *in);
+
+/* add a warning message */
+static __printflike(2,3) void
+error_message(input_t *in, const char *fmt, ...)
+{
+	va_list	args;
+
+	if (in->msgc == 0) {
+		/* don't overwrite previous messages */
+		va_start(args, fmt);
+		in->msgc = vsnprintf(in->msg, sizeof(in->msg), fmt, args);
+		va_end(args);
+	}
+}
+
+/* check current character is as expected, then get next token */
+static int
+accept_char(input_t *in, uint32_t expected)
+{
+	int	ret;
+
+	ret = 1;
+	if (in->parse.ch != expected) {
+		error_message(in, "expected: '%c', got '%c'",
+			expected, in->s[in->c]);
+		ret = 0;
+	}
+	get_next_token(in);
+	return ret;
+}
+
+/* clone a token */
+static retoken_t *
+clone_token(input_t *in, retoken_t *orig)
+{
+	retoken_t	*token;
+
+	if ((token = newtok(orig->type, NULL, NULL, orig->icase)) == NULL) {
+		return NULL;
+	}
+	token->lazy = orig->lazy;
+	token->nparens = orig->nparens;
+	token->ch = orig->ch;
+	if (orig->left) {
+		token->left = clone_token(in, orig->left);
+	}
+	if (orig->right) {
+		token->right = clone_token(in, orig->right);
+	}
+	return token;
+}
+
+/* concat two subexpressions together */
+static retoken_t *
+rec_concat(input_t *in, retoken_t *token)
+{
+	retoken_t	*rhs;
+	retoken_t	*cat;
+
+	while (in->parse.type != TOK_EOL && in->parse.type != TOK_ALT && in->parse.ch != ')') {
+		if ((rhs = rec_repeat(in)) == NULL ||
+		    (cat = newtok(TOK_CAT, token, rhs, 0)) == NULL ||
+		    (token = rec_concat(in, cat)) == NULL) {
+			return NULL;
+		}
+	}
+	return token;
+}
+
+/* recognise an alternate */
+static retoken_t *
+rec_alt(input_t *in)
+{
+	retoken_t	*repeat;
+	retoken_t	*token;
+	retoken_t	*cat;
+	retoken_t	*alt;
+
+	if ((repeat = rec_repeat(in)) == NULL ||
+	    (token = rec_concat(in, repeat)) == NULL) {
+		return NULL;
+	}
+	while (in->parse.type == TOK_ALT) {
+		if (!accept_char(in, '|')) {
+			return NULL;
+		}
+		if ((repeat = rec_repeat(in)) == NULL ||
+		    (cat = rec_concat(in, repeat)) == NULL ||
+		    (alt = newtok(TOK_ALT, token, cat, 0)) == NULL ||
+		    (token = rec_concat(in, alt)) == NULL) {
+			return NULL;
+		}
+	}
+	return token;
+}
+
+/* recognise a single atom */
+static retoken_t *
+rec_single(input_t *in)
+{
+	retoken_t	*token;
+	retoken_t	*alt;
+	int	 	 parenc;
+
+	token = NULL;
+	switch(in->parse.type) {
+	case TOK_PAREN:
+		accept_char(in, '(');
+		if (in->parse.noncapture) {
+			if ((token = rec_alt(in)) == NULL) {
+				return NULL;
+			}
+		} else {
+			parenc = ++in->parenc;
+			if ((alt = rec_alt(in)) == NULL ||
+			    (token = newtok(TOK_PAREN, alt, NULL, 0)) == NULL) {
+				return NULL;
+			}
+			token->nparens = parenc;
+		}
+		if (!accept_char(in, ')')) {
+			return NULL;
+		}
+		break;
+	case TOK_BACKREF:
+		if ((token = newtok(TOK_BACKREF, NULL, NULL, in->parse.icase)) == NULL) {
+			return NULL;
+		}
+		if (in->parse.ch > in->parenc) {
+			error_message(in, "backref %d out of range", in->ch);
+			return NULL;
+		}
+		accept_char(in, token->ch = in->parse.ch);
+		break;
+	case TOK_DOT:
+	case TOK_LIT:
+	case TOK_COLLECTION:
+	case TOK_WIDTH:
+	case TOK_ZEROWIDTH:
+		if ((token = newtok(in->parse.type, NULL, NULL, in->parse.icase)) == NULL) {
+			return NULL;
+		}
+		token->ch = in->parse.ch;
+		token->icase = in->parse.icase;
+		get_next_token(in);
+		break;
+	}
+	return token;
+}
+
+/* a repetition of subexpressions */
+static retoken_t *
+rec_repeat(input_t *in)
+{
+	retoken_t	*cloned;
+	retoken_t	*quest;
+	retoken_t	*token;
+	retoken_t	*orig;
+	uint32_t	 i;
+
+	if ((token = rec_single(in)) == NULL) {
+		return NULL;
+	}
+	orig = token;
+	switch(in->parse.type) {
+	case TOK_STAR:
+	case TOK_PLUS:
+	case TOK_QUEST:
+		if ((token = newtok(in->parse.type, token, NULL, in->parse.icase)) == NULL) {
+			return NULL;
+		}
+		token->lazy = in->parse.lazy;
+		get_next_token(in);
+		break;
+	case TOK_REP:
+		if (in->parse.lo == 0 && in->parse.hi == 0) {
+			orig->type = TOK_FORGET;
+		}
+		for (i = 1 ; i < in->parse.lo ; i++) {
+			if ((cloned = clone_token(in, orig)) == NULL ||
+			    (token = newtok(TOK_CAT, token, cloned, in->parse.icase)) == NULL) {
+				return NULL;
+			}
+		}
+		if (in->parse.hi == AT_LEAST) {
+			if ((token = newtok(TOK_PLUS, token, NULL, in->parse.icase)) == NULL) {
+				return NULL;
+			}
+		} else {
+			for ( ; i < in->parse.hi ; i++) {
+				if ((cloned = clone_token(in, orig)) == NULL ||
+				    (quest = newtok(TOK_QUEST, cloned, NULL, in->parse.icase)) == NULL ||
+				    (token = newtok(TOK_CAT, token, quest, in->parse.icase)) == NULL) {
+					return NULL;
+				}
+			}
+		}
+		get_next_token(in);
+		break;
+	}
+	return token;
+}
+
+/* recognise literal tokens in nospec expression */
+static retoken_t *
+nospec_token(input_t *in)
+{
+	retoken_t	*tok;
+
+	tok = newtok(TOK_LIT, NULL, NULL, 0);
+	tok->ch = in->s[in->c++]; 
+	if (in->c == in->eo) {
+		return tok;
+	}
+	return newtok(TOK_CAT, tok, nospec_token(in), 0);
+}
+
+/* either start of line, or start of nested subexpression */
+#define BASIC_IS_START(i, in)	\
+	((i) == (in) || ((size_t)(i - in) >= 2 && (i)[-2] == '\\' && (i)[-1] == '('))
+
+#define BASIC_IS_END(i, in, sz) \
+	(((size_t)((i) - (in)) == (sz) - 1) || ((i)[1] == '\\' && (i)[2] == ')'))
+
+/* convert a basic regexp into an extended one */
+static ssize_t
+regextend(const char *in, size_t insize, char *out, size_t outsize, uint32_t flags)
+{
+	const char	*i;
+	char		*o;
+
+	for (i = in, o = out ; (size_t)(i - in) < insize && (size_t)(o - out) < outsize ; i++) {
+		switch(*i) {
+		case '+':
+		case '|':
+		case '?':
+		case '{':
+		case '}':
+		case '(':
+		case ')':
+			/* +, |, ?, {, }, (, ) are ordinary characters */
+			*o++ = '\\';
+			*o++ = *i;
+			break;
+		case '\\':
+			switch(i[1]) {
+			case '(':
+			case ')':
+			case '{':
+			case '}':
+				/* basic bounds delimiters are \{ and \} */
+				/* basic nested subexpressions are \( and \) */
+				*o++ = *++i;
+				break;
+			default:
+				/* pass through other backslash-escaped chars */
+				*o++ = *i;
+				break;
+			}
+			break;
+		case '^':
+		case '$':
+			/* ^ is an ordinary char, except at start */
+			/* $ is an ordinary char, except at end */
+			if ((*i == '^' && !BASIC_IS_START(i, in)) ||
+			    (*i == '$' && !BASIC_IS_END(i, in, insize))) {
+				*o++ = '\\';
+			}
+			*o++ = *i;
+			break;
+		case '*':
+			if (flags & AGCRE_REG_CTAGS) {
+				/* * is an ordinary character - see ctags */
+				*o++ = '\\';
+				*o++ = *i;
+			} else {
+				/* THIS IS WHAT THE RE_FORMAT PAGE SAYS FOR '*'
+				 not for ctags, though */
+				if ((*i == '*' && BASIC_IS_START(i, in))) {
+					*o++ = '\\';
+				}
+				*o++ = *i;
+			}
+			break;
+		default:
+			*o++ = *i;
+			break;
+		}
+	}
+	*o = 0x0;
+	return (ssize_t)(o - out);
+}
+
+/* parse the string */
+static retoken_t *
+parse_string(input_t *in)
+{
+	retoken_t	*dotstar;
+	retoken_t	*token;
+	retoken_t	*cat;
+	retoken_t	*alt;
+
+	get_next_token(in);
+	if ((alt = rec_alt(in)) == NULL) {
+		return NULL;
+	}
+	/* make pattern ".*?(PATTERN)" */
+	if ((token = newtok(TOK_PAREN, alt, NULL, 0)) == NULL ||
+	    (cat = newtok(TOK_DOT, NULL, NULL, 0)) == NULL ||
+	    (dotstar = newtok(TOK_STAR, cat, NULL, 0)) == NULL) {
+		return NULL;
+	}
+	dotstar->lazy = 1;
+	return newtok(TOK_CAT, dotstar, token, 0);
+}
+
+/* parse an extended reg exp */
+static retoken_t *
+parse_extended(agcre_regex_t *agcre, input_t *in)
+{
+	retoken_t	*entry;
+	re_t		*re;
+
+	re = agcre->re_g;
+	if ((entry = parse_string(in)) == 0) {
+		memcpy(re->msg, in->msg, in->msgc);
+		return 0;
+	}
+	agcre->re_nsub = in->parenc + 1;
+	return entry;
+}
+
+/* parse a nospec regexp */
+static retoken_t *
+parse_nospec(agcre_regex_t *agcre, input_t *in)
+{
+	retoken_t	*dotstar;
+	retoken_t	*tok;
+
+	tok = nospec_token(in);
+	tok = newtok(TOK_PAREN, tok, NULL, 0);
+	dotstar = newtok(TOK_STAR, newtok(TOK_DOT, NULL, NULL, 0), NULL, 0);
+	dotstar->lazy = 1;
+	tok = newtok(TOK_CAT, dotstar, tok, 0);
+	agcre->re_nsub = in->parenc + 1;
+	return tok;
+}
+
+/* convert basic regexp to extended then parse */
+static retoken_t *
+parse_basic(agcre_regex_t *agcre, input_t *in, uint32_t flags)
+{
+	ssize_t	size;
+	char	s[AGCRE_MAX_EXPR_LENGTH * 4];	
+
+	size = regextend(&in->s[in->so], in->eo - in->so, s, sizeof(s), flags);
+	in->s = s;
+	in->so = 0;
+	in->eo = size;
+	return parse_extended(agcre, in);
+}
+
+#define BOL	0xffffffffU
+
+/* unwrap a character, including UTF-8 chars, into 32bit char */
+static inline int
+unwrap_utf8(input_t *in)
+{
+	uint32_t	n;
+	uint32_t	j;
+
+	in->ch = in->s[in->c];
+	if (in->ch <= 0x7f) {
+		/* 0XXX XXXX one byte */
+		in->bytes = 1;
+		n = in->ch;
+	} else if ((in->ch & 0xe0) == 0xc0) {
+		/* 110X XXXX 2 bytes */
+		in->bytes = 2;
+		n = (in->ch & 0x1f);
+	} else if ((in->ch & 0xf0) == 0xe0) {
+		/* 1110 XXXX three bytes */
+		in->bytes = 3;
+		n = (in->ch & 0x0f);
+	} else if ((in->ch & 0xf8) == 0xf0) {
+		/* 1111 0XXX four bytes */
+		in->bytes = 4;
+		n = (in->ch & 0x07);
+	} else if ((in->ch & 0xfc) == 0xf8) {
+		/* 1111 10XX five bytes */
+		in->bytes = 5;
+		n = (in->ch & 0x03);
+	} else if ((in->ch & 0xfe) == 0xfc) {
+		/* 1111 110X six bytes */
+		in->bytes = 6;
+		n = (in->ch & 0x01);
+	} else {
+		return 0;
+	}
+	if (in->c + in->bytes - 1 > in->eo) {
+		return 0;
+	}
+	for (j = 1 ; j < in->bytes ; j++) {
+		n = (n << 6) | (in->s[in->c + j] & 0x3f);
+	}
+	in->ch = n;
+	return 1;
+}
+
+/* return 1 if host is bigendian */
+static inline int
+host_is_bigendian(void)
+{
+	int indian = 1;
+
+	return (*(char *)(void *)&indian) ? 0 : 1;
+}
+
+/* swap 32bit integer */
+static inline uint32_t
+swap32(uint32_t x)
+{
+	x = ((x & 0x0000ffff) << 16) | ((x & 0xffff0000) >> 16);
+	x = ((x & 0x00ff00ff) << 8) | ((x & 0xff00ff00) >> 8);
+	return x;
+}
+
+/* swap 16bit integer */
+static inline uint32_t
+swap16(uint16_t x)
+{
+	return ((x & 0x00ff) << 8) | ((x & 0xff00) >> 8);
+}
+
+/* higher level get-next-char function */
+static inline int
+get_next_char(input_t *in, uint32_t flags)
+{
+	uint32_t	i32;
+	uint16_t	i16;
+
+	in->prevch = in->ch;
+	in->prevbytes = in->bytes;
+	switch(flags & AGCRE_IN_WIDTH) {
+	case AGCRE_REG_UTF16LE:
+		in->bytes = 1;
+		memcpy(&i16, &in->s[in->c * 2], 2);
+		in->ch = (host_is_bigendian()) ? swap16(i16) : i16;
+		return 1;
+	case AGCRE_REG_UTF16BE:
+		in->bytes = 1;
+		memcpy(&i16, &in->s[in->c * 2], 2);
+		in->ch = (!host_is_bigendian()) ? swap16(i16) : i16;
+		return 1;
+	case AGCRE_REG_UTF32LE:
+		in->bytes = 1;
+		memcpy(&i32, &in->s[in->c * 4], 4);
+		in->ch = (host_is_bigendian()) ? swap32(i32) : i32;
+		return 1;
+	case AGCRE_REG_UTF32BE:
+		in->bytes = 1;
+		memcpy(&i32, &in->s[in->c * 4], 4);
+		in->ch = (!host_is_bigendian()) ? swap32(i32) : i32;
+		return 1;
+	default:
+		return unwrap_utf8(in);
+	}
+}
+
+/* return the number of bytes in a context */
+static inline uint32_t
+context_bytes(uint32_t n)
+{
+	return sizeof(context_t) + (n * sizeof(agcre_regmatch_t));
+}
+
+/* create a new context */
+static context_t *
+make_new_context(re_t *re, int n)
+{
+	context_t	*ctx;
+	
+	if ((ctx = re->ctxlist) != NULL) {
+		re->ctxlist = ctx->next;
+		memset(ctx, 0x0, context_bytes(n));
+	} else {
+		ctx = calloc(1, context_bytes(n));
+	}
+	ctx->nsub = n;
+	ctx->ref = 1;
+	return ctx;
+}
+
+/* increment reference count in the context */
+static inline context_t *
+incref(context_t *ctx)
+{
+	ctx->ref += 1;
+	return ctx;
+}
+
+/* save the current offset in the match context */
+static context_t *
+mark(re_t *re, context_t *ctx, int i, input_t *in, int delta)
+{
+	context_t	*newctx;
+	uint32_t	 j;
+
+	if (ctx->ref > 1) {
+		newctx = make_new_context(re, ctx->nsub);
+		for (j = 0; j < ctx->nsub; j++) {
+			newctx->sub[j] = ctx->sub[j];
+		}
+		ctx->ref -= 1;
+		ctx = newctx;
+	}
+	if (i % 2 == 0) {
+		ctx->sub[i / 2].rm_so = in->c + delta;
+	} else {
+		ctx->sub[i / 2].rm_eo = in->c + delta;
+	}
+	return ctx;
+}
+
+/* decrement reference count in the context */
+static inline void
+decref(re_t *re, context_t *ctx)
+{
+	if (--ctx->ref == 0) {
+		ctx->next = re->ctxlist;
+		re->ctxlist = ctx;
+	}
+}
+
+/* create a new thread */
+static re_thread_t
+newthread(instr_t *pc, context_t *ctx)
+{
+	re_thread_t	t = {pc, ctx};
+
+	return t;
+}
+
+/* create a new thread list */
+static threadlist_t *
+threadlist(uint32_t instrc)
+{
+	return calloc(1, sizeof(threadlist_t) + (instrc * sizeof(re_thread_t)));
+}
+
+/* add a new thread to the list */
+static int
+addthread(re_t *re, threadlist_t *list, re_thread_t t, input_t *in, int delta, int width)
+{
+	context_t	*ctx;
+
+	if (width) {
+		/* we don't check generation for 0-width matches */
+		if (t.pc->gen == re->gen) {
+			decref(re, t.ctx);
+			return 1;
+		}
+		t.pc->gen = re->gen;
+	}
+	switch(t.pc->op) {
+	case OP_JUMP:
+		addthread(re, list, newthread(t.pc->lhs, t.ctx), in, delta, width);
+		break;
+	case OP_SPLIT:
+		addthread(re, list, newthread(t.pc->lhs, incref(t.ctx)), in, delta, width);
+		addthread(re, list, newthread(t.pc->rhs, t.ctx), in, delta, width);
+		break;
+	case OP_SAVE:
+		if ((ctx = mark(re, t.ctx, t.pc->n, in, delta)) == NULL) {
+			return 0;
+		}
+		addthread(re, list, newthread(t.pc+1, ctx), in, delta, width);
+		break;
+	default:
+		list->t[list->nthreads++] = t;
+		break;
+	}
+	return 1;
+}
+
+/* return 1 at beginning of line */
+static inline int
+isbegline(re_t *re, input_t *in)
+{
+	return (re->flags & AGCRE_REG_NOTBOL) ? 0 : (in->prevch == '\n' || in->prevch == BOL);
+}
+
+/* return 1 at end of line */
+static inline int
+isendline(re_t *re, input_t *in)
+{
+	return (re->flags & AGCRE_REG_NOTEOL) ? 0 : in->c == in->eo;
+}
+
+/* return 1 at beginning of words */
+static inline int
+isbegword(re_t *re, input_t *in)
+{
+	return (isbegline(re, in) || !unicode_isident(in->prevch)) &&
+		unicode_isident(in->ch);
+}
+
+/* return 1 at end of words */
+static inline int
+isendword(re_t *re, input_t *in)
+{
+	return (isendline(re, in) || (in->c > in->so && unicode_isident(in->prevch))) &&
+		(!unicode_isident(in->ch));
+}
+
+/* do the chars match? */
+static inline int
+charmatch(re_t *re, instr_t *instr, uint32_t ch, uint32_t wanted)
+{
+	if ((re->flags & AGCRE_REG_ICASE) || instr->icase) {
+		return unicode_tolower(ch) == unicode_tolower(wanted);
+	}
+	return (ch == wanted);
+}
+
+/* can we match "any" character? */
+static inline int
+anymatch(re_t *re, input_t *in)
+{
+	if ((re->flags & AGCRE_REG_NEWLINE) && in->ch == '\n') {
+		return 0;
+	}
+	return (in->c < in->eo);
+}
+
+/* are the chars in the set? */
+static inline int
+setmatch(re_t *re, set_t *set, input_t *in, instr_t *instr)
+{
+	if ((re->flags & AGCRE_REG_ICASE) || instr->icase) {
+		return set_isset(set, unicode_tolower(in->ch)) ||
+			set_isset(set, unicode_toupper(in->ch));
+	}
+	return set_isset(set, in->ch);
+}
+
+/* is this a zerowidth match? */
+static inline int
+zeromatch(re_t *re, uint32_t type, input_t *in)
+{
+	switch(type) {
+	case '^':
+		return isbegline(re, in);
+	case '$':
+		return isendline(re, in);
+	case '<':
+		return isbegword(re, in);
+	case '>':
+		return isendword(re, in);
+	case 'b':
+		return isbegword(re, in) || isendword(re, in);
+	}
+	return 0;
+}
+
+/* we take the leftmost longest match */
+static inline int
+is_better_match(context_t *ctx, context_t *prev)
+{
+	agcre_regoff_t	prevlen;
+	agcre_regoff_t	ctxlen;
+
+	prevlen = (prev) ? prev->sub[0].rm_eo - prev->sub[0].rm_so : -1;
+	ctxlen = ctx->sub[0].rm_eo - ctx->sub[0].rm_so;
+	if (ctxlen > prevlen) {
+		return 1;
+	}
+	return (ctxlen == prevlen) ? (ctx->sub[0].rm_so < prev->sub[0].rm_so) : 0;
+}
+
+/* get length of utf16 string in 16bit chars */
+static inline size_t 
+strlen16(const uint16_t *s)
+{
+	uint32_t	i;
+
+	for (i = 0 ; s[i] != 0x0000 ; i++) {
+	}
+	return i;
+}
+
+/* get length of utf32 string in 32bit chars */
+static inline size_t 
+strlen32(const uint32_t *s)
+{
+	uint32_t	i;
+
+	for (i = 0 ; s[i] != 0 ; i++) {
+	}
+	return i;
+}
+
+/* get length of string in terms of bytes */
+static inline uint32_t
+get_length_in_bytes(const void *vs, uint32_t flags)
+{
+	switch(flags & AGCRE_IN_WIDTH) {
+	case AGCRE_REG_UTF16LE:
+	case AGCRE_REG_UTF16BE:
+		return strlen16(vs) * 2;
+	case AGCRE_REG_UTF32LE:
+	case AGCRE_REG_UTF32BE:
+		return strlen32(vs) * 4;
+	default:
+		return strlen(vs);
+	}
+}
+
+/* free all contexts */
+static inline void
+free_contexts(context_t *ctx)
+{
+	context_t	*next;
+
+	for ( ; ctx ; ctx = next) {
+		next = ctx->next;
+		free(ctx);
+	}
+}
+
+/* unwrap a character, including UTF-8 chars, into 32bit char */
+static inline int
+lex_unwrap_utf8(input_t *in)
+{
+	uint32_t	n;
+	uint32_t	j;
+
+	in->ch = in->s[in->c];
+	if (in->ch <= 0x7f) {
+		/* 0XXX XXXX one byte */
+		in->bytes = 1;
+		n = in->ch;
+	} else if ((in->ch & 0xe0) == 0xc0) {
+		/* 110X XXXX 2 bytes */
+		in->bytes = 2;
+		n = (in->ch & 0x1f);
+	} else if ((in->ch & 0xf0) == 0xe0) {
+		/* 1110 XXXX three bytes */
+		in->bytes = 3;
+		n = (in->ch & 0x0f);
+	} else if ((in->ch & 0xf8) == 0xf0) {
+		/* 1111 0XXX four bytes */
+		in->bytes = 4;
+		n = (in->ch & 0x07);
+	} else if ((in->ch & 0xfc) == 0xf8) {
+		/* 1111 10XX five bytes */
+		in->bytes = 5;
+		n = (in->ch & 0x03);
+	} else if ((in->ch & 0xfe) == 0xfc) {
+		/* 1111 110X six bytes */
+		in->bytes = 6;
+		n = (in->ch & 0x01);
+	} else {
+		return 0;
+	}
+	if (in->c + in->bytes - 1 > in->eo) {
+		return 0;
+	}
+	for (j = 1 ; j < in->bytes ; j++) {
+		n = (n << 6) | (in->s[in->c + j] & 0x3f);
+	}
+	in->ch = n;
+	in->c += in->bytes;
+	return 1;
+}
+
+/* return 1 if host is bigendian */
+static inline int
+lex_host_is_bigendian(void)
+{
+	int indian = 1;
+
+	return (*(char *)(void *)&indian) ? 0 : 1;
+}
+
+/* swap 32bit integer */
+static inline uint32_t
+lex_swap32(uint32_t x)
+{
+	x = ((x & 0x0000ffff) << 16) | ((x & 0xffff0000) >> 16);
+	x = ((x & 0x00ff00ff) << 8) | ((x & 0xff00ff00) >> 8);
+	return x;
+}
+
+/* swap 16bit integer */
+static inline uint32_t
+lex_swap16(uint16_t x)
+{
+	return ((x & 0x00ff) << 8) | ((x & 0xff00) >> 8);
+}
+
+/* return the next char */
+static inline int
+in_nextchar(input_t *in)
+{
+	uint32_t	i32;
+	uint16_t	i16;
+
+	if (in->c == in->eo) {
+		return 0;
+	}
+	in->prevch = in->ch;
+	in->prevbytes = in->bytes;
+	switch(in->re->flags & AGCRE_IN_WIDTH) {
+	case AGCRE_REG_UTF16LE:
+		in->bytes = 2;
+		memcpy(&i16, &in->s[in->c], in->bytes);
+		in->ch = (lex_host_is_bigendian()) ? lex_swap16(i16) : i16;
+		in->c += in->bytes;
+		return 1;
+	case AGCRE_REG_UTF16BE:
+		in->bytes = 2;
+		memcpy(&i16, &in->s[in->c], in->bytes);
+		in->ch = (!lex_host_is_bigendian()) ? lex_swap16(i16) : i16;
+		in->c += in->bytes;
+		return 1;
+	case AGCRE_REG_UTF32LE:
+		in->bytes = 4;
+		memcpy(&i32, &in->s[in->c], in->bytes);
+		in->ch = (lex_host_is_bigendian()) ? lex_swap32(i32) : i32;
+		in->c += in->bytes;
+		return 1;
+	case AGCRE_REG_UTF32BE:
+		in->bytes = 4;
+		memcpy(&i32, &in->s[in->c], in->bytes);
+		in->ch = (!lex_host_is_bigendian()) ? lex_swap32(i32) : i32;
+		in->c += in->bytes;
+		return 1;
+	default:
+		return lex_unwrap_utf8(in);
+	}
+}
+
+/* look ahead */
+static inline uint32_t
+in_peek(input_t *in, int32_t delta)
+{
+	return in->s[in->c + delta];
+}
+
+/* keep an error message, only if we don't have one already */
+static int
+lex_error_message(input_t *in, const char *fmt, ...)
+{
+	va_list	args;
+
+	if (in->msgc == 0) {
+		va_start(args, fmt);
+		in->msgc = vsnprintf(in->msg, sizeof(in->msg), fmt, args);
+		va_end(args);
+	}
+	return TOK_BROKEN;
+}
+
+/* do we have a lazy marker? */
+static inline void
+lazy(input_t *in, retoken_t *token)
+{
+	if (in_peek(in, 0) == '?') {
+		in_nextchar(in);
+		token->lazy = 1;
+	}
+}
+
+/* get a number, up to width digits wide */
+static inline int
+get_number(input_t *in, uint32_t *num, uint32_t width, uint32_t base)
+{
+	static const char	*hex = "0123456789abcdef";
+	const char		*p;
+	uint32_t		 i;
+
+	for (*num = 0, i = 0 ; i < width ; i++) {
+		if ((p = strchr(hex, in_peek(in, i))) == NULL ||
+				(uint32_t)(p - hex) >= base) {
+			break;
+		}
+		*num = (*num * base) + (uint32_t)(p - hex);
+	}
+	if (i > width) {
+		return 0;
+	}
+	in->c += i;
+	return 1;
+}
+
+/* recognise a specific, bounded repeat */
+static inline int
+rep(input_t *in)
+{
+	retoken_t	*parse;
+
+	parse = &in->parse;
+	if (!get_number(in, &parse->lo, 4, 10)) {
+		return lex_error_message(in, "number too large '%.6s", &in->s[parse->so]);
+	}
+	parse->hi = parse->lo;
+	if (in_peek(in, 0) == ',') {
+		in_nextchar(in);
+		if (!get_number(in, &parse->hi, 4, 10)) {
+			return lex_error_message(in, "number too large '%.6s", &in->s[parse->so]);
+		}
+		if (parse->hi == 0) {
+			parse->hi = AT_LEAST;
+		}
+	}
+	if (parse->lo > parse->hi) {
+		return lex_error_message(in, "low %u > hi %u", parse->lo, parse->hi);
+	}
+	in_nextchar(in);
+	if (in->ch != '}') {
+		return lex_error_message(in, "expected } at end of repeat", &in->s[parse->so]);
+	}
+	return TOK_REP;
+}
+
+/* recognise a backslash sequence */
+static int
+rec_backslash(input_t *in, retoken_t *token, int want_backrefs)
+{
+	in_nextchar(in);
+	switch(token->ch = in->ch) {
+	case 'u':
+		if (!get_number(in, &token->ch, 4, 16)) {
+			return lex_error_message(in, "bad unicode character '%.6s", &in->s[token->so]);
+		}
+		break;
+	case 'n':
+		token->ch = '\n';
+		return TOK_LIT;
+	case 'r':
+		token->ch = '\r';
+		return TOK_LIT;
+	case 't':
+		token->ch = '\t';
+		return TOK_LIT;
+	case '0': case '1': case '2': case '3': case '4':
+	case '5': case '6': case '7': case '8': case '9':
+		in->c -= in->prevbytes;
+		if (want_backrefs && get_number(in, &token->ch, 2, 10)) {
+			return TOK_BACKREF;
+		}
+		break;
+	default:
+		if (strchr("<>b", token->ch) != NULL) {
+			return TOK_ZEROWIDTH;
+		}
+		if (strchr("DPSWdpsw", token->ch) != NULL) {
+			return TOK_WIDTH;
+		}
+		break;
+	}
+	return TOK_LIT;
+}
+
+/* function to calculate a hash, used in stringswitch */
+static uint32_t
+djbhashn(const char *s, size_t len)
+{
+	uint32_t	hash;
+
+	for (hash = 5381; len-- > 0 ; ) {
+		hash = hash * 33 + *s++;
+	}
+	return hash + (hash >> 5);
+}
+
+/* recognise a posix class */
+static int
+rec_posix_class(input_t *in, set_t *set)
+{
+	char	*p;
+
+	if ((p = memchr(&in->s[in->c], ']', 10)) == NULL) {
+		return 0;
+	}
+	switch(djbhashn(&in->s[in->c], (size_t)(p - &in->s[in->c]) + 1)) {
+	case /* ":alnum:]" */ 0x32ef56fd:
+		set_add_callback(set, unicode_isalnum, 0);
+		in->c += 8;
+		return 1;
+	case /* ":alpha:]" */ 0x330d1aee:
+		set_add_callback(set, unicode_isalpha, 0);
+		in->c += 8;
+		return 1;
+	case /* ":blank:]" */ 0x815acf00:
+		set_add_callback(set, unicode_isblank, 0);
+		in->c += 8;
+		return 1;
+	case /* ":cntrl:]" */ 0xd6f15906:
+		set_add_callback(set, unicode_iscntrl, 0);
+		in->c += 8;
+		return 1;
+	case /* ":digit:]" */ 0x1154c1e2:
+		set_add_callback(set, unicode_isdigit, 0);
+		in->c += 8;
+		return 1;
+	case /* ":graph:]" */ 0xcb51807:
+		set_add_callback(set, unicode_isgraph, 0);
+		in->c += 8;
+		return 1;
+	case /* ":ident:]" */ 0x8a1572f1:
+		set_add_callback(set, unicode_isident, 0);
+		in->c += 8;
+		return 1;
+	case /* ":lower:]" */ 0x8bfc6af8:
+		set_add_callback(set, unicode_islower, 0);
+		in->c += 8;
+		return 1;
+	case /* ":print:]" */ 0xc7bbf92b:
+		set_add_callback(set, unicode_isprint, 0);
+		in->c += 8;
+		return 1;
+	case /* ":punct:]" */ 0xcf4a84ca:
+		set_add_callback(set, unicode_ispunct, 0);
+		in->c += 8;
+		return 1;
+	case /* ":space:]" */ 0xa876bcf2:
+		set_add_callback(set, unicode_isspace, 0);
+		in->c += 8;
+		return 1;
+	case /* ":upper:]" */ 0x40541fdb:
+		set_add_callback(set, unicode_isupper, 0);
+		in->c += 8;
+		return 1;
+	case /* ":xdigit:]" */ 0x24758e84:
+		set_add_callback(set, unicode_isxdigit, 0);
+		in->c += 9;
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+/* recognise a set */
+static int
+rec_set(input_t *in)
+{
+	const int	 nobackrefs = 0;
+	retoken_t	*token;
+	retoken_t	 bs;
+	uint32_t	 prevch;
+	set_t		*set;
+	int		 complement;
+	int		 done;
+	int		 ch;
+
+	if (memcmp(&in->s[in->c], "[:<:]]", 6) == 0 ||
+	    memcmp(&in->s[in->c], "[:>:]]", 6) == 0) {
+		in->parse.ch = in->s[in->c + 2];
+		in->c += 6;
+		return TOK_ZEROWIDTH;
+	}
+	if (in->re->setc == in->re->maxset) {
+		set = realloc(in->re->sets, (in->re->maxset + 8) * sizeof(*set));
+		if (set == NULL) {
+			return lex_error_message(in, "can't extend sets past %u", in->re->maxset);
+		}
+		memset(&set[in->re->setc], 0x0, 8 * sizeof(*set));
+		in->re->maxset += 8;
+		in->re->sets = set;
+	}
+	set = &in->re->sets[in->parse.ch = in->re->setc++];
+	token = &in->parse;
+	complement = 0;
+	if (in_peek(in, 0) == '^') {
+		complement = 1;
+		set_complement(set);
+		in_nextchar(in);
+	}
+	for (done = 0, prevch = in->prevch ; !done && in_nextchar(in) ; ) {
+		switch(ch = in->ch) {
+		case '\\':
+			memset(&bs, 0x0, sizeof(bs));
+			switch (rec_backslash(in, &bs, nobackrefs)) {
+			case TOK_BROKEN:
+				return lex_error_message(in, "bad backslash '%.10s'", &in->s[token->so]);
+			case TOK_LIT:
+				set_add_case(set, bs.ch);
+				break;
+			case TOK_WIDTH:
+				set_add_callback(set,
+					(token->ch == 'd' || token->ch == 'D') ? unicode_isdigit :
+					(token->ch == 'p' || token->ch == 'P') ? unicode_isprint :
+					(token->ch == 's' || token->ch == 'S') ? unicode_isspace :
+										unicode_isident,
+					unicode_isupper((uint8_t)token->ch));
+				break;
+			}
+			break;
+		case '[':
+			if (!rec_posix_class(in, set)) {
+				return lex_error_message(in, "bad posix class '%.30s'", &in->s[token->so]);
+			}
+			break;
+		case ']':
+			if (in->c == token->so + complement + 2) {
+				set_add_case(set, ']');
+			} else {
+				done = 1;
+			}
+			break;
+		case '-':
+			if (in->c == token->so + complement + 2) {
+				set_add_case(set, '-');
+			} else {
+				prevch = in->prevch;
+				in_nextchar(in);
+				set_add_range(set, prevch, in->ch);
+			}
+			break;
+		default:
+			set_add_case(set, ch);
+			break;
+		}
+	}
+	if (in->ch != ']') {
+		return lex_error_message(in, "expected ']' at end of set: %s", &in->s[token->so]);
+	}
+	return TOK_SET;
+}
+
+/* lexer function */
+static int
+get_next_token(input_t *in)
+{
+	const int	 backrefs = 1;
+	retoken_t	*token = &in->parse;
+
+	memset(token, 0x0, sizeof(*token));
+	token->so = in->c;
+	if (!in_nextchar(in)) {
+		return token->type = TOK_EOL;
+	}
+	token->icase = in->icase;
+	switch(token->ch = in->ch) {
+	case '(':
+		if (in_peek(in, 0) == '?') {
+			if (in_peek(in, 1) == ':') {
+				in->c += 2;
+				token->noncapture = 1;
+			}
+			if (in_peek(in, 1) == 'i') {
+				in->c += 3;
+				in->icase = 1;
+				return get_next_token(in);
+			}
+			if (in_peek(in, 1) == '-' && in_peek(in, 2) == 'i') {
+				in->c += 4;
+				in->icase = 0;
+				return get_next_token(in);
+			}
+		}
+		return token->type = TOK_PAREN;
+	case ')':
+		return token->type = TOK_PAREN;
+	case '?':
+		lazy(in, token);
+		return token->type = TOK_QUEST;
+	case '*':
+		lazy(in, token);
+		return token->type = TOK_STAR;
+	case '+':
+		lazy(in, token);
+		return token->type = TOK_PLUS;
+	case '|':
+		return token->type = TOK_ALT;
+	case '{':
+		return token->type = rep(in);
+	case '^':
+	case '$':
+		return token->type = TOK_ZEROWIDTH;
+	case '[':
+		return token->type = rec_set(in);
+	case '.':
+		return token->type = TOK_DOT;
+	case '\\':
+		return token->type = rec_backslash(in, token, backrefs);
+	default:
+		return token->type = TOK_LIT;
+	}
+}
+
+/* free resources used by a set */
+static void
+set_free(set_t *set)
+{
+	free(set->ranges);
+	free(set->cases);
+	free(set->callbacks);
+}
+
+/* add a single 'case' to a set - a single member */
+static int
+set_add_case(set_t *set, uint32_t n)
+{
+	uint32_t	*v;
+
+	if (set->casec == set->maxcase) {
+		v = realloc(set->cases, (set->maxcase + 16) * sizeof(*v));
+		if (v == NULL) {
+			fprintf(stderr, "set_add_case: failure\n");
+			return 0;
+		}
+		set->maxcase += 16;
+		set->cases = v;
+	}
+	set->cases[set->casec++] = n;
+	return 1;
+}
+
+/* add a range to a set */
+static int
+set_add_range(set_t *set, uint32_t lo, uint32_t hi)
+{
+	uint32_t	*v;
+
+	if (set->rangec == set->maxrange) {
+		v = realloc(set->ranges, (set->maxrange + 16) * sizeof(*v));
+		if (v == NULL) {
+			fprintf(stderr, "set_add_range: failure\n");
+			return 0;
+		}
+		set->maxrange += 16;
+		set->ranges = v;
+	}
+	set->ranges[set->rangec++] = lo;
+	set->ranges[set->rangec++] = hi;
+	return 1;
+}
+
+/* add a callback function to a set */
+static int
+set_add_callback(set_t *set, int (*func)(uint32_t), int negate)
+{
+	cb_t	*v;
+
+	if (set->callbackc == set->maxcallback) {
+		v = realloc(set->callbacks, (set->maxcallback + 16) * sizeof(*v));
+		if (v == NULL) {
+			fprintf(stderr, "set_add_callback: failure\n");
+			return 0;
+		}
+		set->maxcallback += 16;
+		set->callbacks = v;
+	}
+	set->callbacks[set->callbackc].func = func;
+	set->callbacks[set->callbackc++].negate = negate;
+	return 1;
+}
+
+/* test if a number is present in the set */
+static int
+set_isset(set_t *set, uint32_t n)
+{
+	uint32_t	i;
+
+	for (i = 0 ; i < set->casec ; i++) {
+		if (set->cases[i] == n) {
+			return 1 - set->complement;
+		}
+	}
+	for (i = 0 ; i < set->rangec ; i += 2) {
+		if (n >= set->ranges[i] && n <= set->ranges[i + 1]) {
+			return 1 - set->complement;
+		}
+	}
+	for (i = 0 ; i < set->callbackc ; i += 2) {
+		if ((*set->callbacks[i].func)(n) == 1 - set->callbacks[i].negate) {
+			return 1 - set->complement;
+		}
+	}
+	return set->complement;
+}
+
+/* set the complement flag */
+static int
+set_complement(set_t *set)
+{
+	return set->complement = 1 - set->complement;
+}
+
+#define RANGE(a, b) do {						\
+	if (ch >= (a) && ch <= (b)) {					\
+		return 1;						\
+	}								\
+} while(/*CONSTCOND*/0)
+
+
+/* return 1 if ch has White Space property */
+static int
+unicode_isWhite_Space(uint32_t ch)
+{
+	switch(ch) {
+	case 0x0020:
+	case 0x0085:
+	case 0x00A0:
+	case 0x1680:
+	case 0x2028:
+	case 0x2029:
+	case 0x202F:
+	case 0x205F:
+	case 0x3000:
+		return 1;
+	}
+	RANGE(0x0009, 0x000D);
+	RANGE(0x2000, 0x200A);
+	return 0;
+}
+
+/* return 1 if ch has Dash property */
+static int
+unicode_isDash(uint32_t ch)
+{
+	switch(ch) {
+	case 0x002D:
+	case 0x058A:
+	case 0x05BE:
+	case 0x1400:
+	case 0x1806:
+	case 0x2053:
+	case 0x207B:
+	case 0x208B:
+	case 0x2212:
+	case 0x2E17:
+	case 0x2E1A:
+	case 0x2E40:
+	case 0x301C:
+	case 0x3030:
+	case 0x30A0:
+	case 0xFE58:
+	case 0xFE63:
+	case 0xFF0D:
+		return 1;
+	}
+	RANGE(0x2010, 0x2015);
+	RANGE(0x2E3A, 0x2E3B);
+	RANGE(0xFE31, 0xFE32);
+	return 0;
+}
+
+/* return 1 if ch has Hyphen property */
+static int
+unicode_isHyphen(uint32_t ch)
+{
+	switch(ch) {
+	case 0x002D:
+	case 0x00AD:
+	case 0x058A:
+	case 0x1806:
+	case 0x2E17:
+	case 0x30FB:
+	case 0xFE63:
+	case 0xFF0D:
+	case 0xFF65:
+		return 1;
+	}
+	RANGE(0x2010, 0x2011);
+	return 0;
+}
+
+/* return 1 if ch has Quotation Mark property */
+static int
+unicode_isQuotation_Mark(uint32_t ch)
+{
+	switch(ch) {
+	case 0x0022:
+	case 0x0027:
+	case 0x00AB:
+	case 0x00BB:
+	case 0x2018:
+	case 0x2019:
+	case 0x201A:
+	case 0x201D:
+	case 0x201E:
+	case 0x201F:
+	case 0x2039:
+	case 0x203A:
+	case 0x2E42:
+	case 0x300C:
+	case 0x300D:
+	case 0x300E:
+	case 0x300F:
+	case 0x301D:
+	case 0xFE41:
+	case 0xFE42:
+	case 0xFE43:
+	case 0xFE44:
+	case 0xFF02:
+	case 0xFF07:
+	case 0xFF62:
+	case 0xFF63:
+		return 1;
+	}
+	RANGE(0x201B, 0x201C);
+	RANGE(0x301E, 0x301F);
+	return 0;
+}
+
+/* return 1 if ch has Terminal Punctuation property */
+static int
+unicode_isTerminal_Punctuation(uint32_t ch)
+{
+	switch(ch) {
+	case 0x0021:
+	case 0x002C:
+	case 0x002E:
+	case 0x003F:
+	case 0x037E:
+	case 0x0387:
+	case 0x0589:
+	case 0x05C3:
+	case 0x060C:
+	case 0x061B:
+	case 0x061F:
+	case 0x06D4:
+	case 0x070C:
+	case 0x085E:
+	case 0x0F08:
+	case 0x17DA:
+	case 0x2E2E:
+	case 0x2E3C:
+	case 0x2E41:
+	case 0xA92F:
+	case 0xAADF:
+	case 0xABEB:
+	case 0xFF01:
+	case 0xFF0C:
+	case 0xFF0E:
+	case 0xFF1F:
+	case 0xFF61:
+	case 0xFF64:
+	case 0x1039F:
+	case 0x103D0:
+	case 0x10857:
+	case 0x1091F:
+	case 0x111CD:
+	case 0x112A9:
+	case 0x1145B:
+	case 0x11C71:
+	case 0x16AF5:
+	case 0x16B44:
+	case 0x1BC9F:
+		return 1;
+	}
+	RANGE(0x003A, 0x003B);
+	RANGE(0x0700, 0x070A);
+	RANGE(0x07F8, 0x07F9);
+	RANGE(0x0830, 0x083E);
+	RANGE(0x0964, 0x0965);
+	RANGE(0x0E5A, 0x0E5B);
+	RANGE(0x0F0D, 0x0F12);
+	RANGE(0x104A, 0x104B);
+	RANGE(0x1361, 0x1368);
+	RANGE(0x166D, 0x166E);
+	RANGE(0x16EB, 0x16ED);
+	RANGE(0x1735, 0x1736);
+	RANGE(0x17D4, 0x17D6);
+	RANGE(0x1802, 0x1805);
+	RANGE(0x1808, 0x1809);
+	RANGE(0x1944, 0x1945);
+	RANGE(0x1AA8, 0x1AAB);
+	RANGE(0x1B5A, 0x1B5B);
+	RANGE(0x1B5D, 0x1B5F);
+	RANGE(0x1C3B, 0x1C3F);
+	RANGE(0x1C7E, 0x1C7F);
+	RANGE(0x203C, 0x203D);
+	RANGE(0x2047, 0x2049);
+	RANGE(0x3001, 0x3002);
+	RANGE(0xA4FE, 0xA4FF);
+	RANGE(0xA60D, 0xA60F);
+	RANGE(0xA6F3, 0xA6F7);
+	RANGE(0xA876, 0xA877);
+	RANGE(0xA8CE, 0xA8CF);
+	RANGE(0xA9C7, 0xA9C9);
+	RANGE(0xAA5D, 0xAA5F);
+	RANGE(0xAAF0, 0xAAF1);
+	RANGE(0xFE50, 0xFE52);
+	RANGE(0xFE54, 0xFE57);
+	RANGE(0xFF1A, 0xFF1B);
+	RANGE(0x10A56, 0x10A57);
+	RANGE(0x10AF0, 0x10AF5);
+	RANGE(0x10B3A, 0x10B3F);
+	RANGE(0x10B99, 0x10B9C);
+	RANGE(0x11047, 0x1104D);
+	RANGE(0x110BE, 0x110C1);
+	RANGE(0x11141, 0x11143);
+	RANGE(0x111C5, 0x111C6);
+	RANGE(0x111DE, 0x111DF);
+	RANGE(0x11238, 0x1123C);
+	RANGE(0x1144B, 0x1144D);
+	RANGE(0x115C2, 0x115C5);
+	RANGE(0x115C9, 0x115D7);
+	RANGE(0x11641, 0x11642);
+	RANGE(0x1173C, 0x1173E);
+	RANGE(0x11A42, 0x11A43);
+	RANGE(0x11A9B, 0x11A9C);
+	RANGE(0x11AA1, 0x11AA2);
+	RANGE(0x11C41, 0x11C43);
+	RANGE(0x12470, 0x12474);
+	RANGE(0x16A6E, 0x16A6F);
+	RANGE(0x16B37, 0x16B39);
+	RANGE(0x1DA87, 0x1DA8A);
+	return 0;
+}
+
+/* return 1 if ch has Hex Digit property */
+static int
+unicode_isHex_Digit(uint32_t ch)
+{
+	RANGE(0x0030, 0x0039);
+	RANGE(0x0041, 0x0046);
+	RANGE(0x0061, 0x0066);
+	RANGE(0xFF10, 0xFF19);
+	RANGE(0xFF21, 0xFF26);
+	RANGE(0xFF41, 0xFF46);
+	return 0;
+}
+
+/* return 1 if ch has Other Alphabetic property */
+static int
+unicode_isOther_Alphabetic(uint32_t ch)
+{
+	switch(ch) {
+	case 0x0345:
+	case 0x05BF:
+	case 0x05C7:
+	case 0x0670:
+	case 0x06ED:
+	case 0x0711:
+	case 0x0903:
+	case 0x093A:
+	case 0x093B:
+	case 0x0981:
+	case 0x09D7:
+	case 0x0A03:
+	case 0x0A51:
+	case 0x0A75:
+	case 0x0A83:
+	case 0x0AC9:
+	case 0x0B01:
+	case 0x0B3E:
+	case 0x0B3F:
+	case 0x0B40:
+	case 0x0B56:
+	case 0x0B57:
+	case 0x0B82:
+	case 0x0BC0:
+	case 0x0BD7:
+	case 0x0C00:
+	case 0x0C81:
+	case 0x0CBE:
+	case 0x0CBF:
+	case 0x0CC6:
+	case 0x0CCC:
+	case 0x0D57:
+	case 0x0DD6:
+	case 0x0E31:
+	case 0x0E4D:
+	case 0x0EB1:
+	case 0x0ECD:
+	case 0x0F7F:
+	case 0x1031:
+	case 0x1038:
+	case 0x1062:
+	case 0x1082:
+	case 0x109C:
+	case 0x109D:
+	case 0x135F:
+	case 0x17B6:
+	case 0x17C6:
+	case 0x18A9:
+	case 0x1932:
+	case 0x1A1B:
+	case 0x1A55:
+	case 0x1A56:
+	case 0x1A57:
+	case 0x1A61:
+	case 0x1A62:
+	case 0x1B04:
+	case 0x1B35:
+	case 0x1B3B:
+	case 0x1B3C:
+	case 0x1B42:
+	case 0x1B43:
+	case 0x1B82:
+	case 0x1BA1:
+	case 0x1BE7:
+	case 0x1BED:
+	case 0x1BEE:
+	case 0xA827:
+	case 0xA8C5:
+	case 0xA952:
+	case 0xA983:
+	case 0xA9BC:
+	case 0xAA43:
+	case 0xAA4C:
+	case 0xAA4D:
+	case 0xAAB0:
+	case 0xAABE:
+	case 0xAAEB:
+	case 0xAAF5:
+	case 0xABE5:
+	case 0xABE8:
+	case 0xFB1E:
+	case 0x11000:
+	case 0x11001:
+	case 0x11002:
+	case 0x11082:
+	case 0x1112C:
+	case 0x11182:
+	case 0x111BF:
+	case 0x11234:
+	case 0x11237:
+	case 0x1123E:
+	case 0x112DF:
+	case 0x11340:
+	case 0x11357:
+	case 0x11445:
+	case 0x114B9:
+	case 0x114BA:
+	case 0x114C1:
+	case 0x115BE:
+	case 0x1163D:
+	case 0x1163E:
+	case 0x11640:
+	case 0x116AB:
+	case 0x116AC:
+	case 0x116AD:
+	case 0x11726:
+	case 0x11A39:
+	case 0x11A97:
+	case 0x11C2F:
+	case 0x11C3E:
+	case 0x11CA9:
+	case 0x11CB1:
+	case 0x11CB4:
+	case 0x11D3A:
+	case 0x11D43:
+	case 0x11D47:
+	case 0x1BC9E:
+	case 0x1E947:
+		return 1;
+	}
+	RANGE(0x05B0, 0x05BD);
+	RANGE(0x05C1, 0x05C2);
+	RANGE(0x05C4, 0x05C5);
+	RANGE(0x0610, 0x061A);
+	RANGE(0x064B, 0x0657);
+	RANGE(0x0659, 0x065F);
+	RANGE(0x06D6, 0x06DC);
+	RANGE(0x06E1, 0x06E4);
+	RANGE(0x06E7, 0x06E8);
+	RANGE(0x0730, 0x073F);
+	RANGE(0x07A6, 0x07B0);
+	RANGE(0x0816, 0x0817);
+	RANGE(0x081B, 0x0823);
+	RANGE(0x0825, 0x0827);
+	RANGE(0x0829, 0x082C);
+	RANGE(0x08D4, 0x08DF);
+	RANGE(0x08E3, 0x08E9);
+	RANGE(0x08F0, 0x0902);
+	RANGE(0x093E, 0x0940);
+	RANGE(0x0941, 0x0948);
+	RANGE(0x0949, 0x094C);
+	RANGE(0x094E, 0x094F);
+	RANGE(0x0955, 0x0957);
+	RANGE(0x0962, 0x0963);
+	RANGE(0x0982, 0x0983);
+	RANGE(0x09BE, 0x09C0);
+	RANGE(0x09C1, 0x09C4);
+	RANGE(0x09C7, 0x09C8);
+	RANGE(0x09CB, 0x09CC);
+	RANGE(0x09E2, 0x09E3);
+	RANGE(0x0A01, 0x0A02);
+	RANGE(0x0A3E, 0x0A40);
+	RANGE(0x0A41, 0x0A42);
+	RANGE(0x0A47, 0x0A48);
+	RANGE(0x0A4B, 0x0A4C);
+	RANGE(0x0A70, 0x0A71);
+	RANGE(0x0A81, 0x0A82);
+	RANGE(0x0ABE, 0x0AC0);
+	RANGE(0x0AC1, 0x0AC5);
+	RANGE(0x0AC7, 0x0AC8);
+	RANGE(0x0ACB, 0x0ACC);
+	RANGE(0x0AE2, 0x0AE3);
+	RANGE(0x0AFA, 0x0AFC);
+	RANGE(0x0B02, 0x0B03);
+	RANGE(0x0B41, 0x0B44);
+	RANGE(0x0B47, 0x0B48);
+	RANGE(0x0B4B, 0x0B4C);
+	RANGE(0x0B62, 0x0B63);
+	RANGE(0x0BBE, 0x0BBF);
+	RANGE(0x0BC1, 0x0BC2);
+	RANGE(0x0BC6, 0x0BC8);
+	RANGE(0x0BCA, 0x0BCC);
+	RANGE(0x0C01, 0x0C03);
+	RANGE(0x0C3E, 0x0C40);
+	RANGE(0x0C41, 0x0C44);
+	RANGE(0x0C46, 0x0C48);
+	RANGE(0x0C4A, 0x0C4C);
+	RANGE(0x0C55, 0x0C56);
+	RANGE(0x0C62, 0x0C63);
+	RANGE(0x0C82, 0x0C83);
+	RANGE(0x0CC0, 0x0CC4);
+	RANGE(0x0CC7, 0x0CC8);
+	RANGE(0x0CCA, 0x0CCB);
+	RANGE(0x0CD5, 0x0CD6);
+	RANGE(0x0CE2, 0x0CE3);
+	RANGE(0x0D00, 0x0D01);
+	RANGE(0x0D02, 0x0D03);
+	RANGE(0x0D3E, 0x0D40);
+	RANGE(0x0D41, 0x0D44);
+	RANGE(0x0D46, 0x0D48);
+	RANGE(0x0D4A, 0x0D4C);
+	RANGE(0x0D62, 0x0D63);
+	RANGE(0x0D82, 0x0D83);
+	RANGE(0x0DCF, 0x0DD1);
+	RANGE(0x0DD2, 0x0DD4);
+	RANGE(0x0DD8, 0x0DDF);
+	RANGE(0x0DF2, 0x0DF3);
+	RANGE(0x0E34, 0x0E3A);
+	RANGE(0x0EB4, 0x0EB9);
+	RANGE(0x0EBB, 0x0EBC);
+	RANGE(0x0F71, 0x0F7E);
+	RANGE(0x0F80, 0x0F81);
+	RANGE(0x0F8D, 0x0F97);
+	RANGE(0x0F99, 0x0FBC);
+	RANGE(0x102B, 0x102C);
+	RANGE(0x102D, 0x1030);
+	RANGE(0x1032, 0x1036);
+	RANGE(0x103B, 0x103C);
+	RANGE(0x103D, 0x103E);
+	RANGE(0x1056, 0x1057);
+	RANGE(0x1058, 0x1059);
+	RANGE(0x105E, 0x1060);
+	RANGE(0x1067, 0x1068);
+	RANGE(0x1071, 0x1074);
+	RANGE(0x1083, 0x1084);
+	RANGE(0x1085, 0x1086);
+	RANGE(0x1712, 0x1713);
+	RANGE(0x1732, 0x1733);
+	RANGE(0x1752, 0x1753);
+	RANGE(0x1772, 0x1773);
+	RANGE(0x17B7, 0x17BD);
+	RANGE(0x17BE, 0x17C5);
+	RANGE(0x17C7, 0x17C8);
+	RANGE(0x1885, 0x1886);
+	RANGE(0x1920, 0x1922);
+	RANGE(0x1923, 0x1926);
+	RANGE(0x1927, 0x1928);
+	RANGE(0x1929, 0x192B);
+	RANGE(0x1930, 0x1931);
+	RANGE(0x1933, 0x1938);
+	RANGE(0x1A17, 0x1A18);
+	RANGE(0x1A19, 0x1A1A);
+	RANGE(0x1A58, 0x1A5E);
+	RANGE(0x1A63, 0x1A64);
+	RANGE(0x1A65, 0x1A6C);
+	RANGE(0x1A6D, 0x1A72);
+	RANGE(0x1A73, 0x1A74);
+	RANGE(0x1B00, 0x1B03);
+	RANGE(0x1B36, 0x1B3A);
+	RANGE(0x1B3D, 0x1B41);
+	RANGE(0x1B80, 0x1B81);
+	RANGE(0x1BA2, 0x1BA5);
+	RANGE(0x1BA6, 0x1BA7);
+	RANGE(0x1BA8, 0x1BA9);
+	RANGE(0x1BAC, 0x1BAD);
+	RANGE(0x1BE8, 0x1BE9);
+	RANGE(0x1BEA, 0x1BEC);
+	RANGE(0x1BEF, 0x1BF1);
+	RANGE(0x1C24, 0x1C2B);
+	RANGE(0x1C2C, 0x1C33);
+	RANGE(0x1C34, 0x1C35);
+	RANGE(0x1CF2, 0x1CF3);
+	RANGE(0x1DE7, 0x1DF4);
+	RANGE(0x24B6, 0x24E9);
+	RANGE(0x2DE0, 0x2DFF);
+	RANGE(0xA674, 0xA67B);
+	RANGE(0xA69E, 0xA69F);
+	RANGE(0xA823, 0xA824);
+	RANGE(0xA825, 0xA826);
+	RANGE(0xA880, 0xA881);
+	RANGE(0xA8B4, 0xA8C3);
+	RANGE(0xA926, 0xA92A);
+	RANGE(0xA947, 0xA951);
+	RANGE(0xA980, 0xA982);
+	RANGE(0xA9B4, 0xA9B5);
+	RANGE(0xA9B6, 0xA9B9);
+	RANGE(0xA9BA, 0xA9BB);
+	RANGE(0xA9BD, 0xA9BF);
+	RANGE(0xAA29, 0xAA2E);
+	RANGE(0xAA2F, 0xAA30);
+	RANGE(0xAA31, 0xAA32);
+	RANGE(0xAA33, 0xAA34);
+	RANGE(0xAA35, 0xAA36);
+	RANGE(0xAAB2, 0xAAB4);
+	RANGE(0xAAB7, 0xAAB8);
+	RANGE(0xAAEC, 0xAAED);
+	RANGE(0xAAEE, 0xAAEF);
+	RANGE(0xABE3, 0xABE4);
+	RANGE(0xABE6, 0xABE7);
+	RANGE(0xABE9, 0xABEA);
+	RANGE(0x10376, 0x1037A);
+	RANGE(0x10A01, 0x10A03);
+	RANGE(0x10A05, 0x10A06);
+	RANGE(0x10A0C, 0x10A0F);
+	RANGE(0x11038, 0x11045);
+	RANGE(0x110B0, 0x110B2);
+	RANGE(0x110B3, 0x110B6);
+	RANGE(0x110B7, 0x110B8);
+	RANGE(0x11100, 0x11102);
+	RANGE(0x11127, 0x1112B);
+	RANGE(0x1112D, 0x11132);
+	RANGE(0x11180, 0x11181);
+	RANGE(0x111B3, 0x111B5);
+	RANGE(0x111B6, 0x111BE);
+	RANGE(0x1122C, 0x1122E);
+	RANGE(0x1122F, 0x11231);
+	RANGE(0x11232, 0x11233);
+	RANGE(0x112E0, 0x112E2);
+	RANGE(0x112E3, 0x112E8);
+	RANGE(0x11300, 0x11301);
+	RANGE(0x11302, 0x11303);
+	RANGE(0x1133E, 0x1133F);
+	RANGE(0x11341, 0x11344);
+	RANGE(0x11347, 0x11348);
+	RANGE(0x1134B, 0x1134C);
+	RANGE(0x11362, 0x11363);
+	RANGE(0x11435, 0x11437);
+	RANGE(0x11438, 0x1143F);
+	RANGE(0x11440, 0x11441);
+	RANGE(0x11443, 0x11444);
+	RANGE(0x114B0, 0x114B2);
+	RANGE(0x114B3, 0x114B8);
+	RANGE(0x114BB, 0x114BE);
+	RANGE(0x114BF, 0x114C0);
+	RANGE(0x115AF, 0x115B1);
+	RANGE(0x115B2, 0x115B5);
+	RANGE(0x115B8, 0x115BB);
+	RANGE(0x115BC, 0x115BD);
+	RANGE(0x115DC, 0x115DD);
+	RANGE(0x11630, 0x11632);
+	RANGE(0x11633, 0x1163A);
+	RANGE(0x1163B, 0x1163C);
+	RANGE(0x116AE, 0x116AF);
+	RANGE(0x116B0, 0x116B5);
+	RANGE(0x1171D, 0x1171F);
+	RANGE(0x11720, 0x11721);
+	RANGE(0x11722, 0x11725);
+	RANGE(0x11727, 0x1172A);
+	RANGE(0x11A01, 0x11A06);
+	RANGE(0x11A07, 0x11A08);
+	RANGE(0x11A09, 0x11A0A);
+	RANGE(0x11A35, 0x11A38);
+	RANGE(0x11A3B, 0x11A3E);
+	RANGE(0x11A51, 0x11A56);
+	RANGE(0x11A57, 0x11A58);
+	RANGE(0x11A59, 0x11A5B);
+	RANGE(0x11A8A, 0x11A96);
+	RANGE(0x11C30, 0x11C36);
+	RANGE(0x11C38, 0x11C3D);
+	RANGE(0x11C92, 0x11CA7);
+	RANGE(0x11CAA, 0x11CB0);
+	RANGE(0x11CB2, 0x11CB3);
+	RANGE(0x11CB5, 0x11CB6);
+	RANGE(0x11D31, 0x11D36);
+	RANGE(0x11D3C, 0x11D3D);
+	RANGE(0x11D3F, 0x11D41);
+	RANGE(0x16B30, 0x16B36);
+	RANGE(0x16F51, 0x16F7E);
+	RANGE(0x1E000, 0x1E006);
+	RANGE(0x1E008, 0x1E018);
+	RANGE(0x1E01B, 0x1E021);
+	RANGE(0x1E023, 0x1E024);
+	RANGE(0x1E026, 0x1E02A);
+	RANGE(0x1F130, 0x1F149);
+	RANGE(0x1F150, 0x1F169);
+	RANGE(0x1F170, 0x1F189);
+	return 0;
+}
+
+/* return 1 if ch has Other Lowercase property */
+static int
+unicode_isOther_Lowercase(uint32_t ch)
+{
+	switch(ch) {
+	case 0x00AA:
+	case 0x00BA:
+	case 0x0345:
+	case 0x037A:
+	case 0x1D78:
+	case 0x2071:
+	case 0x207F:
+	case 0xA770:
+		return 1;
+	}
+	RANGE(0x02B0, 0x02B8);
+	RANGE(0x02C0, 0x02C1);
+	RANGE(0x02E0, 0x02E4);
+	RANGE(0x1D2C, 0x1D6A);
+	RANGE(0x1D9B, 0x1DBF);
+	RANGE(0x2090, 0x209C);
+	RANGE(0x2170, 0x217F);
+	RANGE(0x24D0, 0x24E9);
+	RANGE(0x2C7C, 0x2C7D);
+	RANGE(0xA69C, 0xA69D);
+	RANGE(0xA7F8, 0xA7F9);
+	RANGE(0xAB5C, 0xAB5F);
+	return 0;
+}
+
+/* return 1 if ch has Other Uppercase property */
+static int
+unicode_isOther_Uppercase(uint32_t ch)
+{
+	RANGE(0x2160, 0x216F);
+	RANGE(0x24B6, 0x24CF);
+	RANGE(0x1F130, 0x1F149);
+	RANGE(0x1F150, 0x1F169);
+	RANGE(0x1F170, 0x1F189);
+	return 0;
+}
+
+/* return 1 if ch has Pattern White Space property */
+static int
+unicode_isPattern_White_Space(uint32_t ch)
+{
+	switch(ch) {
+	case 0x0020:
+	case 0x0085:
+	case 0x2028:
+	case 0x2029:
+		return 1;
+	}
+	RANGE(0x0009, 0x000D);
+	RANGE(0x200E, 0x200F);
+	return 0;
+}
+
+static int
+unicode_isalnum(uint32_t ch)
+{
+	return unicode_isalpha(ch) || unicode_isdigit(ch);
+}
+
+static int
+unicode_isalpha(uint32_t ch)
+{
+	RANGE(0x0041, 0x005a);
+	RANGE(0x0061, 0x007a);
+	return unicode_isOther_Alphabetic(ch);
+}
+
+static int
+unicode_isblank(uint32_t ch)
+{
+	switch(ch) {
+	case 0x0009:
+		return 1;
+	}
+	return unicode_isWhite_Space(ch) || unicode_isPattern_White_Space(ch);
+}
+
+static int
+unicode_iscntrl(uint32_t ch)
+{
+	switch(ch) {
+	case 0x0000:
+	case 0x007f:
+		return 1;
+	}
+	RANGE(0x0001, 0x001f);
+	return 0;
+}
+
+static int
+unicode_isdigit(uint32_t ch)
+{
+	RANGE(0x0030, 0x0039);
+	RANGE(0xFF10, 0xFF19);
+	return 0;
+}
+
+static int
+unicode_isgraph(uint32_t ch)
+{
+	RANGE(0x0021, 0x007e);
+	return 0;
+}
+
+static int
+unicode_islower(uint32_t ch)
+{
+	RANGE(0x0061, 0x007a);
+	return unicode_isOther_Lowercase(ch);
+}
+
+static int
+unicode_isprint(uint32_t ch)
+{
+	RANGE(0x0020, 0x007e);
+	/* XXX */
+	return 0;
+}
+
+static int
+unicode_ispunct(uint32_t ch)
+{
+	return unicode_isTerminal_Punctuation(ch) ||
+		unicode_isQuotation_Mark(ch) ||
+		unicode_isHyphen(ch) ||
+		unicode_isDash(ch);
+}
+
+static int
+unicode_isspace(uint32_t ch)
+{
+	return unicode_isWhite_Space(ch);
+}
+
+static int
+unicode_isupper(uint32_t ch)
+{
+	RANGE(0x0041, 0x005a);
+	return unicode_isOther_Uppercase(ch);
+}
+
+static int
+unicode_isxdigit(uint32_t ch)
+{
+	return unicode_isHex_Digit(ch);
+}
+
+static int
+unicode_isident(uint32_t ch)
+{
+	return unicode_isalnum(ch) || ch == '_';
+}
+
+static uint32_t
+unicode_tolower(uint32_t ch)
+{
+	if (unicode_isupper(ch)) {
+		ch += 0x20;
+	}
+	return ch;
+}
+
+static uint32_t
+unicode_toupper(uint32_t ch)
+{
+	if (unicode_islower(ch)) {
+		ch -= 0x20;
+	}
+	return ch;
+}
+
+/* allocate space - used in regasub */
+static int
+growspace(char **buf, size_t *size, size_t cc, size_t incr)
+{
+	size_t	 newsize;
+	char	*newv;
+
+	if (*size < cc + incr) {
+		newsize = howmany(cc + incr, 64) * 64;
+		newv = realloc(*buf, newsize);
+		if (newv == NULL) {
+			return 0;
+		}
+		memset(&newv[*size], 0x0, newsize - *size);
+		*buf = newv;
+		*size = newsize;
+	}
+	return 1;
+}
+
+/***********************************************************/
+
+/* allocate a new structure and return it */
+agcre_regex_t *
+agcre_new(void)
+{
+	return calloc(1, sizeof(agcre_regex_t));
+}
+
+/* compile regular expression */
+int
+agcre_regcomp(agcre_regex_t *agcre, const void *vs, uint32_t flags)
+{
+	retoken_t	*tok;
+	uint32_t	 n;
+	input_t		 in;
+	re_t		*re;
+	int		 errc;
+
+	if (agcre == NULL || vs == NULL) {
+		return AGCRE_REG_FAILURE;
+	}
+	memset(&in, 0x0, sizeof(in));
+	in.s = (const char *)vs;
+	in.eo = (flags & AGCRE_REG_PEND) ?
+		(agcre_regoff_t)(agcre->re_endp - in.s) :
+		(agcre_regoff_t)strlen(in.s);
+	memset(agcre, 0x0, sizeof(*agcre));
+	agcre->re_g = re = in.re = calloc(1, sizeof(*re));
+	if (in.eo - in.so > AGCRE_MAX_EXPR_LENGTH) {
+		re->msgc = snprintf(re->msg, sizeof(re->msg),
+			"expression length %llu larger than %u",
+			(unsigned long long)(in.eo - in.so),
+			AGCRE_MAX_EXPR_LENGTH);
+		return AGCRE_REG_FAILURE;
+	}
+	if (flags & AGCRE_REG_EXTENDED) {
+		tok = parse_extended(agcre, &in);
+	} else if (flags & AGCRE_REG_BASIC) {
+		tok = parse_basic(agcre, &in, flags);
+	} else if (flags & AGCRE_REG_NOSPEC) {
+		tok = parse_nospec(agcre, &in);
+	} else {
+		re->msgc = snprintf(re->msg, sizeof(re->msg),
+			"unknown type of regexp");
+		return AGCRE_REG_FAILURE;
+	}
+	if (tok == NULL) {
+		return AGCRE_REG_FAILURE;
+	}
+	if (flags & AGCRE_REG_DUMP) {
+		print_token(tok);
+		printf("\n");
+	}
+	errc = 0;
+	n = count(tok, &errc) + 1;
+	if (errc) {
+		re->msgc = snprintf(re->msg, sizeof(re->msg), "illegal token parse");
+		return AGCRE_REG_FAILURE;
+	}
+	if ((re->pc = re->prog = calloc(1, n * sizeof(re->prog[0]))) == NULL) {
+		re->msgc = snprintf(re->msg, sizeof(re->msg), "%d instruction allocation failure", n);
+		return AGCRE_REG_FAILURE;
+	}
+	re->flags = flags;
+	emit(re, tok);
+	freetok(tok);
+	re->pc->op = OP_MATCH;
+	re->pc += 1;
+	re->instrc = re->pc - re->prog;
+	agcre->re_magic = AGCRE_MAGIC;
+	if (flags & AGCRE_REG_DUMP) {
+		printprog(re);
+	}
+	return AGCRE_REG_SUCCESS;
+}
+
+#ifndef USE_ARG
+#define USE_ARG(x)  /*LINTED*/(void)&x
+#endif
+
+/* format the error nicely */
+size_t
+agcre_regerror(int errcode, const agcre_regex_t *agcre, char *errbuf, size_t size)
+{
+	re_t	*re;
+
+	USE_ARG(errcode);
+	if (agcre == NULL || size == 0 || errbuf == NULL) {
+		return 0;
+	}
+	re = agcre->re_g;
+	return snprintf(errbuf, size, "%s", re->msg);
+}
+
+/* execute the regular expression */
+int
+agcre_regexec(agcre_regex_t *agcre, const void *vs, size_t matchc, agcre_regmatch_t *m, uint32_t flags)
+{
+	threadlist_t	*current;
+	threadlist_t	*next;
+	threadlist_t	*tmp;
+	context_t	*matched;
+	context_t	*ctx;
+	uint32_t	 prevflags;
+	uint32_t	 instrc;
+	input_t		 in;
+	instr_t		*instr;
+	instr_t		*prog;
+	size_t		 i;
+	re_t		*re;
+	int		 ret;
+	
+	if (agcre == NULL || vs == NULL || (matchc > 0 && m == NULL)) {
+		return AGCRE_REG_FAILURE;
+	}
+	if ((re = agcre->re_g) == NULL) {
+		return AGCRE_REG_FAILURE;
+	}
+	if (agcre->re_magic != AGCRE_MAGIC) {
+		re->msgc = snprintf(re->msg, sizeof(re->msg),
+			"bad magic number 0x%x, not 0x%x", agcre->re_magic, AGCRE_MAGIC);
+		return AGCRE_REG_FAILURE;
+	}
+	if (matchc > AGCRE_MAX_SUBEXPR) {
+		re->msgc = snprintf(re->msg, sizeof(re->msg),
+			"matchc %zu larger than limit %u", matchc, AGCRE_MAX_SUBEXPR);
+		return AGCRE_REG_FAILURE;
+	}
+	memset(&in, 0x0, sizeof(in));
+	in.s = (const char *)vs;
+	in.so = (flags & AGCRE_REG_STARTEND) ? m[0].rm_so : 0;
+	in.eo = (flags & AGCRE_REG_STARTEND) ? m[0].rm_eo : get_length_in_bytes(vs, flags);
+	matched = NULL;	
+	ctx = make_new_context(re, matchc);
+	for (i = 0; i < matchc ; i++) {
+		m[i].rm_so = m[i].rm_eo = -1;
+		ctx->sub[i].rm_so = ctx->sub[i].rm_eo = -1;
+	}
+	instrc = re->instrc;
+	prog = re->prog;
+	if ((flags | re->flags) & AGCRE_REG_ANCHOR) {
+		instrc -= LEADING_DOT_STAR_INSTRS;
+		prog += LEADING_DOT_STAR_INSTRS;
+	}
+	current = threadlist(instrc);
+	next = threadlist(instrc);
+	re->gen += 1;
+	prevflags = re->flags;
+	re->flags |= flags;
+	in.c = in.so;
+	addthread(re, current, newthread(prog, ctx), &in, 0, 1);
+	ret = AGCRE_REG_FAILURE;
+	for (in.ch = BOL ;  ; in.c += in.bytes) {
+		if (current->nthreads == 0) {
+			break;
+		}
+		if (!get_next_char(&in, flags)) {
+			break;
+		}
+		if (re->flags & AGCRE_REG_TRACE) {
+			printf("\n%lld 0x%0x", (long long)in.c, in.ch);
+		}
+		for (re->gen++, i = 0; i < current->nthreads ; i++) {
+			instr = current->t[i].pc;
+			ctx = current->t[i].ctx;
+			if (re->flags & AGCRE_REG_TRACE) {
+				printf("\nthread %zu, pc %d, ctx %p", i, (int)(instr - prog), ctx);
+			}
+			switch(instr->op) {
+			case OP_CHAR:
+				if (!charmatch(re, instr, in.ch, instr->n)) {
+					decref(re, ctx);
+					break;
+				}
+				goto eol_check;
+			case OP_SET:
+				if (!setmatch(re, &re->sets[instr->n], &in, instr)) {
+					decref(re, ctx);
+					break;
+				}
+				goto eol_check;
+			case OP_ANY:
+eol_check:
+				if (!anymatch(re, &in)) {
+					decref(re, ctx);
+					break;
+				}
+				addthread(re, next, newthread(instr + 1, ctx), &in, in.bytes, 1);
+				break;
+			case OP_BACKREF:
+				if (!charmatch(re, instr, in.ch, in.s[ctx->sub[instr->n].rm_so + ctx->c])) {
+					decref(re, ctx);
+					break;
+				}
+				if (ctx->c == ctx->sub[instr->n].rm_eo - ctx->sub[instr->n].rm_so - in.bytes) {
+					addthread(re, next, newthread(instr + 1, ctx), &in, 0, 1);
+				} else {
+					ctx->c += in.bytes;
+					addthread(re, next, newthread(instr, ctx), &in, ctx->c, 1);
+				}
+				continue;
+			case OP_ZEROWIDTH:
+				if (!zeromatch(re, instr->n, &in)) {
+					decref(re, ctx);
+					break;
+				}
+				addthread(re, current, newthread(instr + 1, ctx), &in, 0, 0);
+				continue;
+			case OP_MATCH:
+				if (is_better_match(ctx, matched)) {
+					if (!matched) {
+						matched = make_new_context(re, matchc);
+					}
+					memcpy(matched, ctx, context_bytes(matchc));
+				}
+				for (i++; i < current->nthreads ; i++) {
+					decref(re, current->t[i].ctx);
+				}
+				if (ctx->sub[0].rm_eo - ctx->sub[0].rm_so == 0) {
+					addthread(re, next, newthread(prog, ctx), &in, in.bytes, 1);
+				}
+				goto break_for;
+			/* OP_JUMP, OP_SPLIT, OP_SAVE handled in addthread, so that */
+			/* machine execution matches what a backtracker would do. */
+			/* This is discussed (but not shown as code) in */
+			/* Regular Expression Matching: the Virtual Machine Approach. */
+			}
+		}
+break_for:
+		tmp = current;
+		current = next;
+		next = tmp;
+		next->nthreads = 0;
+		if (in.c > in.eo) {
+			break;
+		}
+#if 0
+		if ((flags & AGCRE_REG_ANCHOR) &&
+		    ((matched == NULL && current->nthreads < 2 && in.c > in.so) ||
+		     (matched && matched->sub[0].rm_so > (int64_t)in.so))) {
+			break;
+		}
+#endif
+	}
+	if (matched) {
+		if (!(flags & AGCRE_REG_NOSUB)) {
+			for (i = 0; i < matchc; i++) {
+				m[i] = matched->sub[i];
+			}
+		}
+		decref(re, matched);
+		ret = AGCRE_REG_SUCCESS;
+	}
+	free(current);
+	free(next);
+	decref(re, ctx);
+	re->flags = prevflags;
+	return ret;
+}
+
+/* execute the regular expression backwards */
+int
+agcre_rev_regexec(agcre_regex_t *agcre, const void *vs, size_t matchc, agcre_regmatch_t *m, uint32_t flags)
+{
+	int64_t	from;
+	int64_t	to;
+	int64_t	i;
+	int	found;
+	int	this;
+
+	if (flags & AGCRE_REG_STARTEND) {
+		from = m[0].rm_so;
+		to = m[0].rm_eo;
+	} else {
+		from = 0;
+		to = strlen(vs);
+	}
+	flags |= (AGCRE_REG_STARTEND | AGCRE_REG_ANCHOR);
+	for (found = 0, i = to - 1 ; i >= from ; --i) {
+		m[0].rm_so = i;
+		m[0].rm_eo = to;
+		if ((this = agcre_regexec(agcre, vs, matchc, m, flags)) == 0) {
+			found += 1;
+		} else if (found > 0) {
+			m[0].rm_so = i + 1;
+			m[0].rm_eo = to;
+			return agcre_regexec(agcre, vs, matchc, m, flags);
+		}
+	}
+	return AGCRE_REG_FAILURE;
+}
+
+/* free the regular expression */
+void
+agcre_regfree(agcre_regex_t *agcre)
+{
+	uint32_t	 i;
+	re_t		*re;
+
+	if (agcre) {
+		if ((re = agcre->re_g) != NULL) {
+			free(re->prog);
+			for (i = 0 ; i < re->setc ; i++) {
+				set_free(&re->sets[i]);
+			}
+			free(re->sets);
+			free_contexts(re->ctxlist);
+			free(re);
+		}
+	}
+}
+
+/* do regexp sed-style substitution */
+ssize_t
+agcre_regnsub(char *buf, size_t size, const char *repl, const agcre_regmatch_t *rm, const char *str)
+{
+	const char	*i;
+	uint32_t	 len;
+	uint32_t	 num;
+	char		*o;
+	int		 outofroom = 0;
+
+	if (buf == NULL || repl == NULL || rm == NULL || str == NULL || size == 0) {
+		return -1;
+	}
+	memset(buf, 0x0, size);
+	for (i = repl, o = buf ; *i ; ) {
+		if ((size_t)(o - buf) >= size - 1) {
+			/* not enough space, transition to observe */
+			outofroom = 1;
+		}
+		if (*i == '&' ||
+		    (*i == '\\' && (i[1] >= '0' && i[1] <= '9'))) {
+			num = (i[0] == '&') ? 0 : (i[1] - '0');
+			if (rm[num].rm_so < 0 || rm[num].rm_eo < 0 ||
+			    rm[num].rm_eo - rm[num].rm_so < 0 ||
+			    rm[num].rm_eo < rm[num].rm_so ||
+			    rm[num].rm_eo > rm[0].rm_eo) {
+				/* sanity check on regmatch values */
+				return -1;
+			}
+			len = rm[num].rm_eo - rm[num].rm_so;
+			if (len >= (size - (size_t)(o - buf))) {
+				/* not enough space, transition to observe */
+				outofroom = 1;
+			}
+			if (!outofroom) {
+				memcpy(o, &str[rm[num].rm_so], len);
+			}
+			i += 2;
+			o += len;
+		} else {
+			if (!outofroom) {
+				*o = *i;
+			}
+			i += 1;
+			o += 1;
+		}
+	}
+	if (!outofroom) {
+		*o = 0x0;
+	}
+	return (ssize_t)(o - buf);
+}
+
+/* do regexp sed-style substitution */
+ssize_t
+agcre_regasub(char **buf, const char *repl, const agcre_regmatch_t *rm, const char *str)
+{
+	const char	*i;
+	uint32_t	 len;
+	uint32_t	 num;
+	size_t		 size;
+	size_t		 cc;
+
+	if (buf == NULL || repl == NULL || rm == NULL || str == NULL) {
+		return -1;
+	}
+	*buf = NULL;
+	for (i = repl, size = cc = 0 ; *i ; ) {
+		if (*i == '&' ||
+		    (*i == '\\' && (i[1] >= '0' && i[1] <= '9'))) {
+			num = (i[0] == '&') ? 0 : (i[1] - '0');
+			if (rm[num].rm_so < 0 || rm[num].rm_eo < 0 ||
+			    rm[num].rm_eo - rm[num].rm_so < 0 ||
+			    rm[num].rm_eo < rm[num].rm_so ||
+			    rm[num].rm_eo > rm[0].rm_eo) {
+				/* sanity check on regmatch values */
+				return -1;
+			}
+			len = rm[num].rm_eo - rm[num].rm_so;
+			if (!growspace(buf, &size, cc, len)) {
+				return -1;
+			}
+			memcpy(&(*buf)[cc], &str[rm[num].rm_so], len);
+			i += 2;
+			cc += len;
+		} else {
+			if (!growspace(buf, &size, cc, 1)) {
+				return -1;
+			}
+			(*buf)[cc++] = *i++;
+		}
+	}
+	(*buf)[cc] = 0x0;
+	return cc;
+}

Reply via email to