Module Name:    src
Committed By:   christos
Date:           Sat Oct  5 20:23:55 UTC 2019

Modified Files:
        src/usr.bin/sed: compile.c

Log Message:
Recognize \oOOO \dDD \xXX plus the other regular 'C' backslash escapes like
gnu sed does, except when inside regex []. (Gnu sed translates those too,
unless --posix is specified).


To generate a diff of this commit:
cvs rdiff -u -r1.47 -r1.48 src/usr.bin/sed/compile.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/sed/compile.c
diff -u src/usr.bin/sed/compile.c:1.47 src/usr.bin/sed/compile.c:1.48
--- src/usr.bin/sed/compile.c:1.47	Mon Apr  4 20:13:03 2016
+++ src/usr.bin/sed/compile.c	Sat Oct  5 16:23:55 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: compile.c,v 1.47 2016/04/05 00:13:03 christos Exp $	*/
+/*	$NetBSD: compile.c,v 1.48 2019/10/05 20:23:55 christos Exp $	*/
 
 /*-
  * Copyright (c) 1992 Diomidis Spinellis.
@@ -38,7 +38,7 @@
 #endif
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: compile.c,v 1.47 2016/04/05 00:13:03 christos Exp $");
+__RCSID("$NetBSD: compile.c,v 1.48 2019/10/05 20:23:55 christos Exp $");
 #ifdef __FBSDID
 __FBSDID("$FreeBSD: head/usr.bin/sed/compile.c 259132 2013-12-09 18:57:20Z eadler $");
 #endif
@@ -89,6 +89,7 @@ static struct s_command
 		 *findlabel(char *);
 static void	  fixuplabel(struct s_command *, struct s_command *);
 static void	  uselabel(void);
+static void	  parse_escapes(char *);
 
 /*
  * Command specification.  This is used to drive the command parser.
@@ -463,6 +464,7 @@ compile_re(char *re, int case_insensitiv
 	if (case_insensitive)
 		flags |= REG_ICASE;
 	rep = xmalloc(sizeof(regex_t));
+	parse_escapes(re);
 	if ((eval = regcomp(rep, re, flags)) != 0)
 		errx(1, "%lu: %s: RE error: %s",
 				linenum, fname, strregerror(eval, rep));
@@ -471,6 +473,134 @@ compile_re(char *re, int case_insensitiv
 	return (rep);
 }
 
+static char
+cton(char c, int base)
+{
+	switch (c) {
+	case '0': case '1': case '2': case '3': case '4':
+	case '5': case '6': case '7':
+		return (char)(c - '0');
+	case '8': case '9':
+		return base == 8 ? '?' : (char)(c - '0');
+	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+		return base == 16 ? (char)(c - 'a' + 10) : '?'; 
+	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+		return base == 16 ? (char)(c - 'A' + 10) : '?'; 
+	default:
+		return '?';
+	}
+}
+
+static int
+ston(char **pp, char *sp, int base)
+{
+	char *p = *pp, n;
+	int r = cton(p[1], base);
+
+	if (r == '?')
+		return 0;
+
+	p++;
+	while ((n = cton(p[1], base)) != '?' && r < 255) {
+		r = r * base + n;
+		p++;
+	}
+	*sp = (char)r;
+	*pp = p;
+	return 1;
+}
+		
+static int
+unescape(char **pp, char **spp)
+{
+	char *p = *pp;
+	char *sp = *spp;
+
+	switch (*p) {
+	case 'o':
+		if (!ston(&p, sp, 8))
+			return 0;
+		break;
+	case 'd':
+		if (!ston(&p, sp, 10))
+			return 0;
+		break;
+	case 'x':
+		if (!ston(&p, sp, 16))
+			return 0;
+		break;
+	case 'a':
+		*sp = '\a';
+		p++;
+		break;
+#if 0
+	// No, \b RE
+	case 'b':
+		*sp = '\b';
+		break;
+#endif
+	case 'f':
+		*sp = '\f';
+		break;
+	case 'n':
+		*sp = '\n';
+		break;
+	case 'r':
+		*sp = '\r';
+		break;
+	case 'v':
+		*sp = '\v';
+		break;
+	default:
+		return 0;
+	}
+	*spp = sp + 1;
+	*pp = p;
+	return 1;
+}
+
+static void
+parse_escapes(char *buf)
+{
+	char bracket = '\0';
+	char *p, *q;
+
+	p = q = buf;
+
+	for (p = q = buf; *p; p++) {
+		if (*p == '\\' && p[1] && !bracket) {
+			p++;
+			if (unescape(&p, &q))
+				continue;
+			*q++ = '\\';
+		}
+		switch (*p) {
+		case '[':
+			if (!bracket)
+				bracket = *p;
+			break;
+		case '.':
+		case ':':
+		case '=':
+			if (bracket == '[' && p[-1] == '[')
+				bracket = *p;
+			break;
+		case ']':
+			if (!bracket)
+			    break;
+			if (bracket == '[')
+				bracket = '\0';
+			else if (p[-2] != bracket && p[-1] == bracket)
+				bracket = '[';
+			break;
+		default:
+			break;
+		}
+		*q++ = *p;
+	}
+	*q = '\0';
+}
+
 /*
  * Compile the substitution string of a regular expression and set res to
  * point to a saved copy of it.  Nsub is the number of parenthesized regular
@@ -508,7 +638,8 @@ compile_subst(char *p, struct s_subst *s
 				else
 					p++;
 
-				if (*p == '\0') {
+				switch (*p) {
+				case '\0':
 					/*
 					 * This escaped character is continued
 					 * in the next part of the line.  Note
@@ -519,7 +650,9 @@ compile_subst(char *p, struct s_subst *s
 					sawesc = 1;
 					p--;
 					continue;
-				} else if (strchr("123456789", *p) != NULL) {
+				case '0': case '1': case '2': case '3':
+				case '4': case '5': case '6': case '7':
+				case '8': case '9':
 					*sp++ = '\\';
 					ref = (u_char)(*p - '0');
 					if (s->re != NULL &&
@@ -528,8 +661,16 @@ compile_subst(char *p, struct s_subst *s
 								linenum, fname, *p);
 					if (s->maxbref < ref)
 						s->maxbref = ref;
-				} else if (*p == '&' || *p == '\\')
+					break;
+				case '&':
+				case '\\':
 					*sp++ = '\\';
+					break;
+				default:
+					if (unescape(&p, &sp))
+						continue;
+					break;
+				}
 			} else if (*p == c) {
 				if (*++p == '\0' && more) {
 					if (cu_fgets(lbuf, sizeof(lbuf), &more))

Reply via email to