CVS commit: [tls-earlyentropy] src/usr.bin/sed

Thor Lancelot Simon Sun, 10 Aug 2014 00:44:20 -0700

Module Name:    src
Committed By:   tls
Date:           Sun Aug 10 06:58:55 UTC 2014


Modified Files:
        src/usr.bin/sed [tls-earlyentropy]: Makefile POSIX compile.c defs.h
            extern.h main.c misc.c process.c sed.1

Log Message:
Rebase.


To generate a diff of this commit:
cvs rdiff -u -r1.14 -r1.14.22.1 src/usr.bin/sed/Makefile
cvs rdiff -u -r1.4 -r1.4.88.1 src/usr.bin/sed/POSIX
cvs rdiff -u -r1.39 -r1.39.4.1 src/usr.bin/sed/compile.c
cvs rdiff -u -r1.10 -r1.10.22.1 src/usr.bin/sed/defs.h
cvs rdiff -u -r1.11 -r1.11.22.1 src/usr.bin/sed/extern.h \
    src/usr.bin/sed/misc.c
cvs rdiff -u -r1.21 -r1.21.22.1 src/usr.bin/sed/main.c
cvs rdiff -u -r1.39 -r1.39.6.1 src/usr.bin/sed/process.c
cvs rdiff -u -r1.32 -r1.32.4.1 src/usr.bin/sed/sed.1

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/sed/Makefile
diff -u src/usr.bin/sed/Makefile:1.14 src/usr.bin/sed/Makefile:1.14.22.1
--- src/usr.bin/sed/Makefile:1.14	Tue Apr 14 22:15:26 2009
+++ src/usr.bin/sed/Makefile	Sun Aug 10 06:58:55 2014
@@ -1,7 +1,8 @@
-#	$NetBSD: Makefile,v 1.14 2009/04/14 22:15:26 lukem Exp $
+#	$NetBSD: Makefile,v 1.14.22.1 2014/08/10 06:58:55 tls Exp $
 #	from: @(#)Makefile	8.1 (Berkeley) 6/6/93
 
 .include <bsd.own.mk>
+WARNS=6
 
 PROG=	sed
 SRCS=	compile.c main.c misc.c process.c

Index: src/usr.bin/sed/POSIX
diff -u src/usr.bin/sed/POSIX:1.4 src/usr.bin/sed/POSIX:1.4.88.1
--- src/usr.bin/sed/POSIX:1.4	Thu Jan  9 20:21:25 1997
+++ src/usr.bin/sed/POSIX	Sun Aug 10 06:58:55 2014
@@ -1,5 +1,6 @@
-#	$NetBSD: POSIX,v 1.4 1997/01/09 20:21:25 tls Exp $
-#	from: @(#)POSIX	8.1 (Berkeley) 6/6/93
+# $NetBSD: POSIX,v 1.4.88.1 2014/08/10 06:58:55 tls Exp $
+#	@(#)POSIX	8.1 (Berkeley) 6/6/93
+# $FreeBSD: head/usr.bin/sed/POSIX 168417 2007-04-06 08:43:30Z yar $
 
 Comments on the IEEE P1003.2 Draft 12
      Part 2: Shell and Utilities
@@ -118,10 +119,15 @@ All uses of "POSIX" refer to section 4.5
 	1,3c\
 		text
 
-	Historic implementations, and this implementation, do not output
-	the text in the above example.  The general rule, therefore,
-	is that a range whose second address is never matched extends to
-	the end of the input.
+	Historic implementations did not output the text in the above
+	example.  Therefore it was believed that a range whose second
+	address was never matched extended to the end of the input.
+	However, the current practice adopted by this implementation,
+	as well as by those from GNU and SUN, is as follows:  The text
+	from the 'c' command still isn't output because the second address
+	isn't actually matched; but the range is reset after all if its
+	second address is a line number.  In the above example, only the
+	first line of the input will be deleted.
 
 13.	Historical implementations allow an output suppressing #n at the
 	beginning of -e arguments as well as in a script file.  POSIX

Index: src/usr.bin/sed/compile.c
diff -u src/usr.bin/sed/compile.c:1.39 src/usr.bin/sed/compile.c:1.39.4.1
--- src/usr.bin/sed/compile.c:1.39	Fri Jun 28 15:04:35 2013
+++ src/usr.bin/sed/compile.c	Sun Aug 10 06:58:55 2014
@@ -1,6 +1,7 @@
-/*	$NetBSD: compile.c,v 1.39 2013/06/28 15:04:35 joerg Exp $	*/
+/*	$NetBSD: compile.c,v 1.39.4.1 2014/08/10 06:58:55 tls Exp $	*/
 
 /*-
+ * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -32,58 +33,25 @@
  * SUCH DAMAGE.
  */
 
-/*-
- * Copyright (c) 1992 Diomidis Spinellis.
- *
- * This code is derived from software contributed to Berkeley by
- * Diomidis Spinellis of Imperial College, University of London.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
 #if HAVE_NBTOOL_CONFIG_H
 #include "nbtool_config.h"
 #endif
 
 #include <sys/cdefs.h>
-#ifndef lint
+__RCSID("$NetBSD: compile.c,v 1.39.4.1 2014/08/10 06:58:55 tls Exp $");
+#ifdef __FBSDID
+__FBSDID("$FreeBSD: head/usr.bin/sed/compile.c 259132 2013-12-09 18:57:20Z eadler $");
+#endif
+
 #if 0
-static char sccsid[] = "@(#)compile.c	8.2 (Berkeley) 4/28/95";
-#else
-__RCSID("$NetBSD: compile.c,v 1.39 2013/06/28 15:04:35 joerg Exp $");
+static const char sccsid[] = "@(#)compile.c	8.1 (Berkeley) 6/6/93";
 #endif
-#endif /* not lint */
 
 #include <sys/types.h>
 #include <sys/stat.h>
 
 #include <ctype.h>
+#include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
@@ -91,14 +59,11 @@ __RCSID("$NetBSD: compile.c,v 1.39 2013/
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <wchar.h>
 
 #include "defs.h"
 #include "extern.h"
 
-#ifndef _POSIX2_LINE_MAX
-#define _POSIX2_LINE_MAX (2 * BUFSIZ)
-#endif
-
 #define LHSZ	128
 #define	LHMASK	(LHSZ - 1)
 static struct labhash {
@@ -110,12 +75,12 @@ static struct labhash {
 
 static char	 *compile_addr(char *, struct s_addr *);
 static char	 *compile_ccl(char **, char *);
-static char	 *compile_delimited(char *, char *);
+static char	 *compile_delimited(char *, char *, int);
 static char	 *compile_flags(char *, struct s_subst *);
-static char	 *compile_re(char *, regex_t **);
+static regex_t	 *compile_re(char *, int);
 static char	 *compile_subst(char *, struct s_subst *);
 static char	 *compile_text(void);
-static char	 *compile_tr(char *, char **);
+static char	 *compile_tr(char *, struct s_tr **);
 static struct s_command
 		**compile_stream(struct s_command **);
 static char	 *duptoeol(char *, const char *);
@@ -184,39 +149,43 @@ compile(void)
 	match = xmalloc((maxnsub + 1) * sizeof(regmatch_t));
 }
 
-#define EATSPACE() 						\
-	while (*p && isascii((unsigned char)*p) &&		\
-	    isspace((unsigned char)*p))				\
-		p++						\
+#define EATSPACE() do {							\
+	if (p)								\
+		while (*p && isspace((unsigned char)*p))                \
+			p++;						\
+	} while (0)
 
 static struct s_command **
 compile_stream(struct s_command **link)
 {
 	char *p;
-	static char *lbuf;	/* To avoid excessive malloc calls */
-	static size_t bufsize;
+	static char lbuf[_POSIX2_LINE_MAX + 1];	/* To save stack */
 	struct s_command *cmd, *cmd2, *stack;
 	struct s_format *fp;
+	char re[_POSIX2_LINE_MAX + 1];
 	int naddr;				/* Number of addresses */
 
 	stack = 0;
 	for (;;) {
-		if ((p = cu_fgets(&lbuf, &bufsize)) == NULL) {
+		if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) {
 			if (stack != 0)
-				err(COMPILE, "unexpected EOF (pending }'s)");
+				errx(1, "%lu: %s: unexpected EOF (pending }'s)",
+							linenum, fname);
 			return (link);
 		}
 
 semicolon:	EATSPACE();
-		if (*p == '#' || *p == '\0')
-			continue;
-		else if (*p == ';') {
-			p++;
-			goto semicolon;
+		if (p) {
+			if (*p == '#' || *p == '\0')
+				continue;
+			else if (*p == ';') {
+				p++;
+				goto semicolon;
+			}
 		}
 		*link = cmd = xmalloc(sizeof(struct s_command));
 		link = &cmd->next;
-		cmd->nonsel = cmd->inrange = 0;
+		cmd->startline = cmd->nonsel = 0;
 		/* First parse the addresses */
 		naddr = 0;
 
@@ -241,16 +210,17 @@ semicolon:	EATSPACE();
 
 nonsel:		/* Now parse the command */
 		if (!*p)
-			err(COMPILE, "command expected");
+			errx(1, "%lu: %s: command expected", linenum, fname);
 		cmd->code = *p;
 		for (fp = cmd_fmts; fp->code; fp++)
 			if (fp->code == *p)
 				break;
 		if (!fp->code)
-			err(COMPILE, "invalid command code %c", *p);
+			errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p);
 		if (naddr > fp->naddr)
-			err(COMPILE,
-"command %c expects up to %d address(es), found %d", *p, fp->naddr, naddr);
+			errx(1,
+				"%lu: %s: command %c expects up to %d address(es), found %d",
+				linenum, fname, *p, fp->naddr, naddr);
 		switch (fp->args) {
 		case NONSEL:			/* ! */
 			p++;
@@ -273,7 +243,7 @@ nonsel:		/* Now parse the command */
 			 */
 			cmd->nonsel = 1;
 			if (stack == 0)
-				err(COMPILE, "unexpected }");
+				errx(1, "%lu: %s: unexpected }", linenum, fname);
 			cmd2 = stack;
 			stack = cmd2->next;
 			cmd2->next = cmd;
@@ -281,26 +251,32 @@ nonsel:		/* Now parse the command */
 		case EMPTY:		/* d D g G h H l n N p P q x = \0 */
 			p++;
 			EATSPACE();
-			if (*p == ';') {
+			switch (*p) {
+			case ';':
 				p++;
 				link = &cmd->next;
 				goto semicolon;
+			case '}':
+				goto semicolon;
+			case '\0':
+				break;
+			default:
+				errx(1, "%lu: %s: extra characters at the end of %c command",
+						linenum, fname, cmd->code);
 			}
-			if (*p)
-				err(COMPILE,
-"extra characters at the end of %c command", cmd->code);
 			break;
 		case TEXT:			/* a c i */
 			p++;
 			EATSPACE();
 			if (*p != '\\')
-				err(COMPILE,
-"command %c expects \\ followed by text", cmd->code);
+				errx(1,
+"%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code);
 			p++;
 			EATSPACE();
 			if (*p)
-				err(COMPILE,
-"extra characters after \\ at the end of %c command", cmd->code);
+				errx(1,
+				"%lu: %s: extra characters after \\ at the end of %c command",
+				linenum, fname, cmd->code);
 			cmd->t = compile_text();
 			break;
 		case COMMENT:			/* \0 # */
@@ -309,20 +285,20 @@ nonsel:		/* Now parse the command */
 			p++;
 			EATSPACE();
 			if (*p == '\0')
-				err(COMPILE, "filename expected");
+				errx(1, "%lu: %s: filename expected", linenum, fname);
 			cmd->t = duptoeol(p, "w command");
 			if (aflag)
 				cmd->u.fd = -1;
-			else if ((cmd->u.fd = open(p, 
+			else if ((cmd->u.fd = open(p,
 			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
 			    DEFFILEMODE)) == -1)
-				err(FATAL, "%s: %s", p, strerror(errno));
+				err(1, "%s", p);
 			break;
 		case RFILE:			/* r */
 			p++;
 			EATSPACE();
 			if (*p == '\0')
-				err(COMPILE, "filename expected");
+				errx(1, "%lu: %s: filename expected", linenum, fname);
 			else
 				cmd->t = duptoeol(p, "read command");
 			break;
@@ -339,21 +315,35 @@ nonsel:		/* Now parse the command */
 			EATSPACE();
 			cmd->t = duptoeol(p, "label");
 			if (strlen(p) == 0)
-				err(COMPILE, "empty label");
+				errx(1, "%lu: %s: empty label", linenum, fname);
 			enterlabel(cmd);
 			break;
 		case SUBST:			/* s */
 			p++;
 			if (*p == '\0' || *p == '\\')
-				err(COMPILE,
-"substitute pattern can not be delimited by newline or backslash");
-			cmd->u.s = xmalloc(sizeof(struct s_subst));
-			p = compile_re(p, &cmd->u.s->re);
+				errx(1,
+"%lu: %s: substitute pattern can not be delimited by newline or backslash",
+					linenum, fname);
+			cmd->u.s = xcalloc(1, sizeof(struct s_subst));
+			p = compile_delimited(p, re, 0);
 			if (p == NULL)
-				err(COMPILE, "unterminated substitute pattern");
+				errx(1,
+				"%lu: %s: unterminated substitute pattern", linenum, fname);
+
+			/* Compile RE with no case sensitivity temporarily */
+			if (*re == '\0')
+				cmd->u.s->re = NULL;
+			else
+				cmd->u.s->re = compile_re(re, 0);
 			--p;
 			p = compile_subst(p, cmd->u.s);
 			p = compile_flags(p, cmd->u.s);
+
+			/* Recompile RE with case sensitivity from "I" flag if any */
+			if (*re == '\0')
+				cmd->u.s->re = NULL;
+			else
+				cmd->u.s->re = compile_re(re, cmd->u.s->icase);
 			EATSPACE();
 			if (*p == ';') {
 				p++;
@@ -363,23 +353,29 @@ nonsel:		/* Now parse the command */
 			break;
 		case TR:			/* y */
 			p++;
-			p = compile_tr(p, (char **)(void *)&cmd->u.y);
+			p = compile_tr(p, &cmd->u.y);
 			EATSPACE();
-			if (*p == ';') {
+			switch (*p) {
+			case ';':
 				p++;
 				link = &cmd->next;
 				goto semicolon;
+			case '}':
+				goto semicolon;
+			case '\0':
+				break;
+			default:
+				errx(1,
+"%lu: %s: extra text at the end of a transform command", linenum, fname);
 			}
 			if (*p)
-				err(COMPILE,
-"extra text at the end of a transform command");
 			break;
 		}
 	}
 }
 
 /*
- * Get a delimited string.  P points to the delimiter of the string; d points
+ * Get a delimited string.  P points to the delimeter of the string; d points
  * to a buffer area.  Newline and delimiter escapes are processed; other
  * escapes are ignored.
  *
@@ -388,7 +384,7 @@ nonsel:		/* Now parse the command */
  * with the processed string.
  */
 static char *
-compile_delimited(char *p, char *d)
+compile_delimited(char *p, char *d, int is_tr)
 {
 	char c;
 
@@ -396,13 +392,15 @@ compile_delimited(char *p, char *d)
 	if (c == '\0')
 		return (NULL);
 	else if (c == '\\')
-		err(COMPILE, "\\ can not be used as a string delimiter");
+		errx(1, "%lu: %s: \\ can not be used as a string delimiter",
+				linenum, fname);
 	else if (c == '\n')
-		err(COMPILE, "newline can not be used as a string delimiter");
+		errx(1, "%lu: %s: newline can not be used as a string delimiter",
+				linenum, fname);
 	while (*p) {
-		if (*p == '[') {
+		if (*p == '[' && *p != c) {
 			if ((d = compile_ccl(&p, d)) == NULL)
-				err(COMPILE, "unbalanced brackets ([])");
+				errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname);
 			continue;
 		} else if (*p == '\\' && p[1] == '[') {
 			*d++ = *p++;
@@ -412,9 +410,12 @@ compile_delimited(char *p, char *d)
 			*d++ = '\n';
 			p += 2;
 			continue;
-		} else if (*p == '\\' && p[1] == '\\')
-			*d++ = *p++;
-		else if (*p == c) {
+		} else if (*p == '\\' && p[1] == '\\') {
+			if (is_tr)
+				p++;
+			else
+				*d++ = *p++;
+		} else if (*p == c) {
 			*d = '\0';
 			return (p + 1);
 		}
@@ -442,40 +443,32 @@ compile_ccl(char **sp, char *t)
 			for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
 				if ((c = *s) == '\0')
 					return NULL;
-		} else if (*s == '\\' && s[1] == 'n')
-			    *t = '\n', s++;
+		}
 	return (*s == ']') ? *sp = ++s, ++t : NULL;
 }
 
 /*
- * Get a regular expression.  P points to the delimiter of the regular
- * expression; repp points to the address of a regexp pointer.  Newline
- * and delimiter escapes are processed; other escapes are ignored.
- * Returns a pointer to the first character after the final delimiter
- * or NULL in the case of a non terminated regular expression.  The regexp
- * pointer is set to the compiled regular expression.
+ * Compiles the regular expression in RE and returns a pointer to the compiled
+ * regular expression.
  * Cflags are passed to regcomp.
  */
-static char *
-compile_re(char *p, regex_t **repp)
+static regex_t *
+compile_re(char *re, int case_insensitive)
 {
-	int eval;
-	char *re;
+	regex_t *rep;
+	int eval, flags;
 
-	re = xmalloc(strlen(p) + 1); /* strlen(re) <= strlen(p) */
-	p = compile_delimited(p, re);
-	if (p && strlen(re) == 0) {
-		*repp = NULL;
-		free(re);
-		return (p);
-	}
-	*repp = xmalloc(sizeof(regex_t));
-	if (p && (eval = regcomp(*repp, re, ere)) != 0)
-		err(COMPILE, "RE error: %s", strregerror(eval, *repp));
-	if (maxnsub < (*repp)->re_nsub)
-		maxnsub = (*repp)->re_nsub;
-	free(re);
-	return (p);
+
+	flags = rflags;
+	if (case_insensitive)
+		flags |= REG_ICASE;
+	rep = xmalloc(sizeof(regex_t));
+	if ((eval = regcomp(rep, re, flags)) != 0)
+		errx(1, "%lu: %s: RE error: %s",
+				linenum, fname, strregerror(eval, rep));
+	if (maxnsub < rep->re_nsub)
+		maxnsub = rep->re_nsub;
+	return (rep);
 }
 
 /*
@@ -486,11 +479,11 @@ compile_re(char *p, regex_t **repp)
 static char *
 compile_subst(char *p, struct s_subst *s)
 {
-	static char *lbuf;
-	static size_t bufsize;
-	int asize, ref, size, len;
+	static char lbuf[_POSIX2_LINE_MAX + 1];
+	size_t asize, size;
+	u_char ref;
 	char c, *text, *op, *sp;
-	int sawesc = 0;
+	int more = 1, sawesc = 0;
 
 	c = *p++;			/* Terminator character */
 	if (c == '\0')
@@ -498,16 +491,10 @@ compile_subst(char *p, struct s_subst *s
 
 	s->maxbref = 0;
 	s->linenum = linenum;
-	text = NULL;
-	asize = size = 0;
+	asize = 2 * _POSIX2_LINE_MAX + 1;
+	text = xmalloc(asize);
+	size = 0;
 	do {
-		len = ROUNDLEN(strlen(p) + 1);
-		if (asize - size < len) {
-			do {
-				asize += len;
-			} while (asize - size < len);
-			text = xrealloc(text, asize);
-		}
 		op = sp = text + size;
 		for (; *p; p++) {
 			if (*p == '\\' || sawesc) {
@@ -534,33 +521,40 @@ compile_subst(char *p, struct s_subst *s
 					continue;
 				} else if (strchr("123456789", *p) != NULL) {
 					*sp++ = '\\';
-					ref = *p - '0';
+					ref = (u_char)(*p - '0');
 					if (s->re != NULL &&
-					    (size_t)ref > s->re->re_nsub)
-						err(COMPILE,
-"\\%c not defined in the RE", *p);
+					    ref > s->re->re_nsub)
+						errx(1, "%lu: %s: \\%c not defined in the RE",
+								linenum, fname, *p);
 					if (s->maxbref < ref)
 						s->maxbref = ref;
 				} else if (*p == '&' || *p == '\\')
 					*sp++ = '\\';
 			} else if (*p == c) {
-				p++;
+				if (*++p == '\0' && more) {
+					if (cu_fgets(lbuf, sizeof(lbuf), &more))
+						p = lbuf;
+				}
 				*sp++ = '\0';
-				size += sp - op;
+				size += (size_t)(sp - op);
 				s->new = xrealloc(text, size);
 				return (p);
 			} else if (*p == '\n') {
-				err(COMPILE,
-"unescaped newline inside substitute pattern");
+				errx(1,
+"%lu: %s: unescaped newline inside substitute pattern", linenum, fname);
 				/* NOTREACHED */
 			}
 			*sp++ = *p;
 		}
-		size += sp - op;
-	} while ((p = cu_fgets(&lbuf, &bufsize)));
-	err(COMPILE, "unterminated substitute in regular expression");
+		size += (size_t)(sp - op);
+		if (asize - size < _POSIX2_LINE_MAX + 1) {
+			asize *= 2;
+			text = xrealloc(text, asize);
+		}
+	} while (cu_fgets(p = lbuf, sizeof(lbuf), &more));
+	errx(1, "%lu: %s: unterminated substitute in regular expression",
+			linenum, fname);
 	/* NOTREACHED */
-	return (NULL);
 }
 
 /*
@@ -570,19 +564,21 @@ static char *
 compile_flags(char *p, struct s_subst *s)
 {
 	int gn;			/* True if we have seen g or n */
-	char wfile[PATH_MAX], *q;
+	unsigned long nval;
+	char wfile[_POSIX2_LINE_MAX + 1], *q;
 
 	s->n = 1;				/* Default */
 	s->p = 0;
 	s->wfile = NULL;
 	s->wfd = -1;
+	s->icase = 0;
 	for (gn = 0;;) {
 		EATSPACE();			/* EXTENSION */
 		switch (*p) {
 		case 'g':
 			if (gn)
-				err(COMPILE,
-"more than one number or 'g' in substitute flags");
+				errx(1,
+"%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
 			gn = 1;
 			s->n = 0;
 			break;
@@ -593,22 +589,30 @@ compile_flags(char *p, struct s_subst *s
 		case 'p':
 			s->p = 1;
 			break;
+		case 'i':
+		case 'I':
+			s->icase = 1;
+			break;
 		case '1': case '2': case '3':
 		case '4': case '5': case '6':
 		case '7': case '8': case '9':
 			if (gn)
-				err(COMPILE,
-"more than one number or 'g' in substitute flags");
+				errx(1,
+"%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
 			gn = 1;
-			/* XXX Check for overflow */
-			s->n = (int)strtol(p, &p, 10);
+			errno = 0;
+			nval = strtoul(p, &p, 10);
+			if (errno == ERANGE || nval > INT_MAX)
+				errx(1,
+"%lu: %s: overflow in the 'N' substitute flag", linenum, fname);
+			s->n = (int)nval;
 			p--;
 			break;
 		case 'w':
 			p++;
 #ifdef HISTORIC_PRACTICE
 			if (*p != ' ') {
-				err(WARNING, "space missing before w wfile");
+				warnx("%lu: %s: space missing before w wfile", linenum, fname);
 				return (p);
 			}
 #endif
@@ -621,16 +625,16 @@ compile_flags(char *p, struct s_subst *s
 			}
 			*q = '\0';
 			if (q == wfile)
-				err(COMPILE, "no wfile specified");
+				errx(1, "%lu: %s: no wfile specified", linenum, fname);
 			s->wfile = strdup(wfile);
 			if (!aflag && (s->wfd = open(wfile,
 			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
 			    DEFFILEMODE)) == -1)
-				err(FATAL, "%s: %s", wfile, strerror(errno));
+				err(1, "%s", wfile);
 			return (p);
 		default:
-			err(COMPILE,
-			    "bad flag in substitute command: '%c'", *p);
+			errx(1, "%lu: %s: bad flag in substitute command: '%c'",
+					linenum, fname, *p);
 			break;
 		}
 		p++;
@@ -641,82 +645,127 @@ compile_flags(char *p, struct s_subst *s
  * Compile a translation set of strings into a lookup table.
  */
 static char *
-compile_tr(char *p, char **transtab)
+compile_tr(char *p, struct s_tr **py)
 {
-	int i;
-	char *lt, *op, *np;
-	char *old = NULL, *new = NULL;
+	struct s_tr *y;
+	size_t i;
+	const char *op, *np;
+	char old[_POSIX2_LINE_MAX + 1];
+	char new[_POSIX2_LINE_MAX + 1];
+	size_t oclen, oldlen, nclen, newlen;
+	mbstate_t mbs1, mbs2;
+
+	*py = y = xmalloc(sizeof(*y));
+	y->multis = NULL;
+	y->nmultis = 0;
 
 	if (*p == '\0' || *p == '\\')
-		err(COMPILE,
-"transform pattern can not be delimited by newline or backslash");
-	old = xmalloc(strlen(p) + 1);
-	p = compile_delimited(p, old);
-	if (p == NULL) {
-		err(COMPILE, "unterminated transform source string");
-		goto bad;
-	}
-	new = xmalloc(strlen(p) + 1);
-	p = compile_delimited(p - 1, new);
-	if (p == NULL) {
-		err(COMPILE, "unterminated transform target string");
-		goto bad;
-	}
+		errx(1,
+	"%lu: %s: transform pattern can not be delimited by newline or backslash",
+			linenum, fname);
+	p = compile_delimited(p, old, 1);
+	if (p == NULL)
+		errx(1, "%lu: %s: unterminated transform source string",
+				linenum, fname);
+	p = compile_delimited(p - 1, new, 1);
+	if (p == NULL)
+		errx(1, "%lu: %s: unterminated transform target string",
+				linenum, fname);
 	EATSPACE();
-	if (strlen(new) != strlen(old)) {
-		err(COMPILE, "transform strings are not the same length");
-		goto bad;
+	op = old;
+	oldlen = mbsrtowcs(NULL, &op, 0, NULL);
+	if (oldlen == (size_t)-1)
+		err(1, NULL);
+	np = new;
+	newlen = mbsrtowcs(NULL, &np, 0, NULL);
+	if (newlen == (size_t)-1)
+		err(1, NULL);
+	if (newlen != oldlen)
+		errx(1, "%lu: %s: transform strings are not the same length",
+				linenum, fname);
+	if (MB_CUR_MAX == 1) {
+		/*
+		 * The single-byte encoding case is easy: generate a
+		 * lookup table.
+		 */
+		for (i = 0; i <= UCHAR_MAX; i++)
+			y->bytetab[i] = (u_char)i;
+		for (; *op; op++, np++)
+			y->bytetab[(u_char)*op] = (u_char)*np;
+	} else {
+		/*
+		 * Multi-byte encoding case: generate a lookup table as
+		 * above, but only for single-byte characters. The first
+		 * bytes of multi-byte characters have their lookup table
+		 * entries set to 0, which causes do_tr() to search through
+		 * an auxiliary vector of multi-byte mappings.
+		 */
+		memset(&mbs1, 0, sizeof(mbs1));
+		memset(&mbs2, 0, sizeof(mbs2));
+		for (i = 0; i <= UCHAR_MAX; i++)
+			y->bytetab[i] = (u_char)((btowc((int)i) != WEOF) ? i : 0);
+		while (*op != '\0') {
+			oclen = mbrlen(op, MB_LEN_MAX, &mbs1);
+			if (oclen == (size_t)-1 || oclen == (size_t)-2)
+				errc(1, EILSEQ, NULL);
+			nclen = mbrlen(np, MB_LEN_MAX, &mbs2);
+			if (nclen == (size_t)-1 || nclen == (size_t)-2)
+				errc(1, EILSEQ, NULL);
+			if (oclen == 1 && nclen == 1)
+				y->bytetab[(u_char)*op] = (u_char)*np;
+			else {
+				y->bytetab[(u_char)*op] = 0;
+				y->multis = xrealloc(y->multis,
+				    (y->nmultis + 1) * sizeof(*y->multis));
+				i = y->nmultis++;
+				y->multis[i].fromlen = oclen;
+				memcpy(y->multis[i].from, op, oclen);
+				y->multis[i].tolen = nclen;
+				memcpy(y->multis[i].to, np, nclen);
+			}
+			op += oclen;
+			np += nclen;
+		}
 	}
-	/* We assume characters are 8 bits */
-	lt = xmalloc(UCHAR_MAX+1);
-	for (i = 0; i <= UCHAR_MAX; i++)
-		lt[i] = (char)i;
-	for (op = old, np = new; *op; op++, np++)
-		lt[(u_char)*op] = *np;
-	*transtab = lt;
-	free(old);
-	free(new);
 	return (p);
-bad:
-	free(old);
-	free(new);
-	return (NULL);
 }
 
 /*
- * Compile the text following an a, c, or i command.
+ * Compile the text following an a or i command.
  */
 static char *
 compile_text(void)
 {
-	int asize, size, len;
-	char *lbuf, *text, *p, *op, *s;
-	size_t bufsize;
-
-	lbuf = text = NULL;
-	asize = size = 0;
-	while ((p = cu_fgets(&lbuf, &bufsize))) {
-		len = ROUNDLEN(strlen(p) + 1);
-		if (asize - size < len) {
-			do {
-				asize += len;
-			} while (asize - size < len);
-			text = xrealloc(text, asize);
-		}
+	size_t asize, size;
+	int esc_nl;
+	char *text, *p, *op, *s;
+	char lbuf[_POSIX2_LINE_MAX + 1];
+
+	asize = 2 * _POSIX2_LINE_MAX + 1;
+	text = xmalloc(asize);
+	size = 0;
+	while (cu_fgets(lbuf, sizeof(lbuf), NULL)) {
 		op = s = text + size;
-		for (; *p; p++) {
-			if (*p == '\\')
-				p++;
+		p = lbuf;
+		EATSPACE();
+		for (esc_nl = 0; *p != '\0'; p++) {
+			if (*p == '\\' && p[1] != '\0' && *++p == '\n')
+				esc_nl = 1;
 			*s++ = *p;
 		}
-		size += s - op;
-		if (p[-2] != '\\') {
+		size += (size_t)(s - op);
+		if (!esc_nl) {
 			*s = '\0';
 			break;
 		}
+		if (asize - size < _POSIX2_LINE_MAX + 1) {
+			asize *= 2;
+			text = xrealloc(text, asize);
+		}
 	}
-	free(lbuf);
-	return (xrealloc(text, size + 1));
+	text[size] = '\0';
+	p = xrealloc(text, size + 1);
+	return (p);
 }
 
 /*
@@ -726,30 +775,49 @@ compile_text(void)
 static char *
 compile_addr(char *p, struct s_addr *a)
 {
-	char *end;
+	char *end, re[_POSIX2_LINE_MAX + 1];
+	int icase;
 
+	icase = 0;
+
+	a->type = 0;
 	switch (*p) {
 	case '\\':				/* Context address */
 		++p;
 		/* FALLTHROUGH */
 	case '/':				/* Context address */
-		p = compile_re(p, &a->u.r);
+		p = compile_delimited(p, re, 0);
 		if (p == NULL)
-			err(COMPILE, "unterminated regular expression");
+			errx(1, "%lu: %s: unterminated regular expression", linenum, fname);
+		/* Check for case insensitive regexp flag */
+		if (*p == 'I') {
+			icase = 1;
+			p++;
+		}
+		if (*re == '\0')
+			a->u.r = NULL;
+		else
+			a->u.r = compile_re(re, icase);
 		a->type = AT_RE;
 		return (p);
 
 	case '$':				/* Last line */
 		a->type = AT_LAST;
 		return (p + 1);
+
+	case '+':				/* Relative line number */
+		a->type = AT_RELLINE;
+		p++;
+		/* FALLTHROUGH */
 						/* Line number */
-	case '0': case '1': case '2': case '3': case '4': 
+	case '0': case '1': case '2': case '3': case '4':
 	case '5': case '6': case '7': case '8': case '9':
-		a->type = AT_LINE;
-		a->u.l = strtol(p, &end, 10);
+		if (a->type == 0)
+			a->type = AT_LINE;
+		a->u.l = strtoul(p, &end, 10);
 		return (end);
 	default:
-		err(COMPILE, "expected context address");
+		errx(1, "%lu: %s: expected context address", linenum, fname);
 		return (NULL);
 	}
 }
@@ -763,16 +831,17 @@ duptoeol(char *s, const char *ctype)
 {
 	size_t len;
 	int ws;
-	char *start;
+	char *p, *start;
 
 	ws = 0;
 	for (start = s; *s != '\0' && *s != '\n'; ++s)
 		ws = isspace((unsigned char)*s);
 	*s = '\0';
 	if (ws)
-		err(WARNING, "whitespace after %s", ctype);
-	len = s - start + 1;
-	return (memmove(xmalloc(len), start, len));
+		warnx("%lu: %s: whitespace after %s", linenum, fname, ctype);
+	len = (size_t)(s - start + 1);
+	p = xmalloc(len);
+	return (memmove(p, start, len));
 }
 
 /*
@@ -800,7 +869,7 @@ fixuplabel(struct s_command *cp, struct 
 				break;
 			}
 			if ((cp->u.c = findlabel(cp->t)) == NULL)
-				err(COMPILE2, "undefined label '%s'", cp->t);
+				errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t);
 			free(cp->t);
 			break;
 		case '{':
@@ -825,7 +894,7 @@ enterlabel(struct s_command *cp)
 	lhp = &labels[h & LHMASK];
 	for (lh = *lhp; lh != NULL; lh = lh->lh_next)
 		if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
-			err(COMPILE2, "duplicate label '%s'", cp->t);
+			errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t);
 	lh = xmalloc(sizeof *lh);
 	lh->lh_next = *lhp;
 	lh->lh_hash = h;
@@ -856,7 +925,7 @@ findlabel(char *name)
 	return (NULL);
 }
 
-/* 
+/*
  * Warn about any unused labels.  As a side effect, release the label hash
  * table space.
  */
@@ -870,8 +939,8 @@ uselabel(void)
 		for (lh = labels[i]; lh != NULL; lh = next) {
 			next = lh->lh_next;
 			if (!lh->lh_ref)
-				err(WARNING, "unused label '%s'",
-				    lh->lh_cmd->t);
+				warnx("%lu: %s: unused label '%s'",
+				    linenum, fname, lh->lh_cmd->t);
 			free(lh);
 		}
 	}

Index: src/usr.bin/sed/defs.h
diff -u src/usr.bin/sed/defs.h:1.10 src/usr.bin/sed/defs.h:1.10.22.1
--- src/usr.bin/sed/defs.h:1.10	Fri Feb 19 16:35:27 2010
+++ src/usr.bin/sed/defs.h	Sun Aug 10 06:58:55 2014
@@ -1,6 +1,7 @@
-/*	$NetBSD: defs.h,v 1.10 2010/02/19 16:35:27 tnn Exp $	*/
+/*	$NetBSD: defs.h,v 1.10.22.1 2014/08/10 06:58:55 tls Exp $	*/
 
 /*-
+ * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -31,54 +32,17 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	from: @(#)defs.h	8.1 (Berkeley) 6/6/93
- *	$NetBSD: defs.h,v 1.10 2010/02/19 16:35:27 tnn Exp $
- */
-
-/*-
- * Copyright (c) 1992 Diomidis Spinellis.
- *
- * This code is derived from software contributed to Berkeley by
- * Diomidis Spinellis of Imperial College, University of London.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	from: @(#)defs.h	8.1 (Berkeley) 6/6/93
- *	$NetBSD: defs.h,v 1.10 2010/02/19 16:35:27 tnn Exp $
+ *	@(#)defs.h	8.1 (Berkeley) 6/6/93
+ * $FreeBSD: head/usr.bin/sed/defs.h 192732 2009-05-25 06:45:33Z brian $
  */
 
 /*
  * Types of address specifications
  */
 enum e_atype {
-	AT_RE,					/* Line that match RE */
+	AT_RE	    = 1,			/* Line that match RE */
 	AT_LINE,				/* Specific line */
+	AT_RELLINE,				/* Relative line */
 	AT_LAST					/* Last line */
 };
 
@@ -99,14 +63,28 @@ struct s_addr {
 struct s_subst {
 	int n;					/* Occurrence to subst. */
 	int p;					/* True if p flag */
+	int icase;				/* True if I flag */
 	char *wfile;				/* NULL if no wfile */
 	int wfd;				/* Cached file descriptor */
 	regex_t *re;				/* Regular expression */
-	int maxbref;				/* Largest backreference. */
+	unsigned int maxbref;			/* Largest backreference. */
 	u_long linenum;				/* Line number. */
 	char *new;				/* Replacement text */
 };
 
+/*
+ * Translate command.
+ */
+struct s_tr {
+	unsigned char bytetab[256];
+	struct trmulti {
+		size_t fromlen;
+		char from[MB_LEN_MAX];
+		size_t tolen;
+		char to[MB_LEN_MAX];
+	} *multis;
+	size_t nmultis;
+};
 
 /*
  * An internally compiled command.
@@ -116,16 +94,16 @@ struct s_subst {
 struct s_command {
 	struct s_command *next;			/* Pointer to next command */
 	struct s_addr *a1, *a2;			/* Start and end address */
+	u_long startline;			/* Start line number or zero */
 	char *t;				/* Text for : a c i r w */
 	union {
 		struct s_command *c;		/* Command(s) for b t { */
 		struct s_subst *s;		/* Substitute command */
-		u_char *y;			/* Replace command array */
+		struct s_tr *y;			/* Replace command array */
 		int fd;				/* File descriptor for w */
 	} u;
 	char code;				/* Command code */
 	u_int nonsel:1;				/* True if ! */
-	u_int inrange:1;			/* True if in range */
 };
 
 /*
@@ -170,18 +148,3 @@ typedef struct {
 	char *back;		/* Backing memory. */
 	size_t blen;		/* Backing memory length. */
 } SPACE;
-
-/*
- * Error severity codes:
- */
-#define	FATAL		0	/* Exit immediately with 1 */
-#define	ERROR		1	/* Continue, but change exit value */
-#define	WARNING		2	/* Just print the warning */
-#define	COMPILE		3	/* Print error, count and finish script */
-#define	COMPILE2	3	/* Print error, count and finish script */
-
-/*
- * Round up to the nearest multiple of _POSIX2_LINE_MAX
- */
-#define ROUNDLEN(x) \
-    (((x) + _POSIX2_LINE_MAX - 1) & ~(_POSIX2_LINE_MAX - 1))

Index: src/usr.bin/sed/extern.h
diff -u src/usr.bin/sed/extern.h:1.11 src/usr.bin/sed/extern.h:1.11.22.1
--- src/usr.bin/sed/extern.h:1.11	Fri Feb 19 16:35:27 2010
+++ src/usr.bin/sed/extern.h	Sun Aug 10 06:58:55 2014
@@ -1,6 +1,7 @@
-/*	$NetBSD: extern.h,v 1.11 2010/02/19 16:35:27 tnn Exp $	*/
+/*	$NetBSD: extern.h,v 1.11.22.1 2014/08/10 06:58:55 tls Exp $	*/
 
 /*-
+ * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -31,46 +32,8 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	from: @(#)extern.h	8.1 (Berkeley) 6/6/93
- *	$NetBSD: extern.h,v 1.11 2010/02/19 16:35:27 tnn Exp $
- */
-
-/*-
- * Copyright (c) 1992 Diomidis Spinellis.
- *
- * This code is derived from software contributed to Berkeley by
- * Diomidis Spinellis of Imperial College, University of London.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	from: @(#)extern.h	8.1 (Berkeley) 6/6/93
- *	$NetBSD: extern.h,v 1.11 2010/02/19 16:35:27 tnn Exp $
+ *	@(#)extern.h	8.1 (Berkeley) 6/6/93
+ * $FreeBSD: head/usr.bin/sed/extern.h 170608 2007-06-12 12:05:24Z yar $
  */
 
 extern struct s_command *prog;
@@ -78,20 +41,21 @@ extern struct s_appends *appends;
 extern regmatch_t *match;
 extern size_t maxnsub;
 extern u_long linenum;
-extern int appendnum;
-extern int lastline;
+extern size_t appendnum;
 extern int aflag, eflag, nflag;
-extern int ere;
-extern const char *fname;
+extern const char *fname, *outfname;
+extern FILE *infile, *outfile;
+extern int rflags;	/* regex flags to use */
 
 void	 cfclose(struct s_command *, struct s_command *);
 void	 compile(void);
 void	 cspace(SPACE *, const char *, size_t, enum e_spflag);
-char	*cu_fgets(char **, size_t *);
-void	 err(int, const char *, ...)
-     __attribute__((__format__(__printf__, 2, 3)));
+char	*cu_fgets(char *, int, int *);
 int	 mf_fgets(SPACE *, enum e_spflag);
+int	 lastline(void);
 void	 process(void);
+void	 resetstate(void);
 char	*strregerror(int, regex_t *);
 void	*xmalloc(size_t);
 void	*xrealloc(void *, size_t);
+void	*xcalloc(size_t, size_t);
Index: src/usr.bin/sed/misc.c
diff -u src/usr.bin/sed/misc.c:1.11 src/usr.bin/sed/misc.c:1.11.22.1
--- src/usr.bin/sed/misc.c:1.11	Fri Feb 19 16:35:27 2010
+++ src/usr.bin/sed/misc.c	Sun Aug 10 06:58:55 2014
@@ -1,6 +1,7 @@
-/*	$NetBSD: misc.c,v 1.11 2010/02/19 16:35:27 tnn Exp $	*/
+/*	$NetBSD: misc.c,v 1.11.22.1 2014/08/10 06:58:55 tls Exp $	*/
 
 /*-
+ * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -32,59 +33,25 @@
  * SUCH DAMAGE.
  */
 
-/*-
- * Copyright (c) 1992 Diomidis Spinellis.
- *
- * This code is derived from software contributed to Berkeley by
- * Diomidis Spinellis of Imperial College, University of London.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
 #if HAVE_NBTOOL_CONFIG_H
 #include "nbtool_config.h"
 #endif
 
 #include <sys/cdefs.h>
-#ifndef lint
+__RCSID("$NetBSD: misc.c,v 1.11.22.1 2014/08/10 06:58:55 tls Exp $");
+#ifdef __FBSDID
+__FBSDID("$FreeBSD: head/usr.bin/sed/misc.c 200462 2009-12-13 03:14:06Z delphij $");
+#endif
+
 #if 0
-static char sccsid[] = "@(#)misc.c	8.1 (Berkeley) 6/6/93";
-#else
-__RCSID("$NetBSD: misc.c,v 1.11 2010/02/19 16:35:27 tnn Exp $");
+static const char sccsid[] = "@(#)misc.c	8.1 (Berkeley) 6/6/93";
 #endif
-#endif /* not lint */
 
 #include <sys/types.h>
 
-#include <errno.h>
+#include <err.h>
+#include <limits.h>
 #include <regex.h>
-#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -101,8 +68,8 @@ xmalloc(size_t size)
 	void *p;
 
 	if ((p = malloc(size)) == NULL)
-		err(FATAL, "%s", strerror(errno));
-	return (p);
+		err(1, "malloc(%zu)", size);
+	return p;
 }
 
 /*
@@ -115,12 +82,24 @@ xrealloc(void *p, size_t size)
 		return (xmalloc(size));
 
 	if ((p = realloc(p, size)) == NULL)
-		err(FATAL, "%s", strerror(errno));
-	return (p);
+		err(1, "realloc(%zu)", size);
+	return p;
 }
 
 /*
- * Return a string for a regular expression error passed.  This is a overkill,
+ * realloc with result test
+ */
+void *
+xcalloc(size_t c, size_t n)
+{
+	void *p;
+
+	if ((p = calloc(c, n)) == NULL)
+		err(1, "calloc(%zu, %zu)", c, n);
+	return p;
+}
+/*
+ * Return a string for a regular expression error passed.  This is overkill,
  * because of the silly semantics of regerror (we can never know the size of
  * the buffer).
  */
@@ -138,27 +117,3 @@ strregerror(int errcode, regex_t *preg)
 	(void)regerror(errcode, preg, oe, s);
 	return (oe);
 }
-
-/*
- * Error reporting function
- */
-void
-err(int severity, const char *fmt, ...)
-{
-	va_list ap;
-
-	va_start(ap, fmt);
-	(void)fprintf(stderr, "sed: ");
-	switch (severity) {
-	case WARNING:
-	case COMPILE:
-		(void)fprintf(stderr, "%lu: %s: ", linenum, fname);
-	}
-	(void)vfprintf(stderr, fmt, ap);
-	va_end(ap);
-	(void)fprintf(stderr, "\n");
-	if (severity == WARNING)
-		return;
-	exit(1);
-	/* NOTREACHED */
-}

Index: src/usr.bin/sed/main.c
diff -u src/usr.bin/sed/main.c:1.21 src/usr.bin/sed/main.c:1.21.22.1
--- src/usr.bin/sed/main.c:1.21	Fri Feb 19 16:35:27 2010
+++ src/usr.bin/sed/main.c	Sun Aug 10 06:58:55 2014
@@ -1,6 +1,8 @@
-/*	$NetBSD: main.c,v 1.21 2010/02/19 16:35:27 tnn Exp $	*/
+/*	$NetBSD: main.c,v 1.21.22.1 2014/08/10 06:58:55 tls Exp $	*/
 
 /*-
+ * Copyright (c) 2013 Johann 'Myrkraverk' Oskarsson.
+ * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -32,67 +34,39 @@
  * SUCH DAMAGE.
  */
 
-/*-
- * Copyright (c) 1992 Diomidis Spinellis.
- *
- * This code is derived from software contributed to Berkeley by
- * Diomidis Spinellis of Imperial College, University of London.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
 #if HAVE_NBTOOL_CONFIG_H
 #include "nbtool_config.h"
 #endif
 
 #include <sys/cdefs.h>
+__RCSID("$NetBSD: main.c,v 1.21.22.1 2014/08/10 06:58:55 tls Exp $");
+#ifdef __FBSDID
+__FBSDID("$FreeBSD: head/usr.bin/sed/main.c 252231 2013-06-26 04:14:19Z pfg $");
+#endif
+
 #ifndef lint
 __COPYRIGHT("@(#) Copyright (c) 1992, 1993\
- The Regents of the University of California.  All rights reserved.");
-#endif /* not lint */
+	The Regents of the University of California.  All rights reserved.");
+#endif
 
-#ifndef lint
 #if 0
-static char sccsid[] = "@(#)main.c	8.2 (Berkeley) 1/3/94";
-#else
-__RCSID("$NetBSD: main.c,v 1.21 2010/02/19 16:35:27 tnn Exp $");
+static const char sccsid[] = "@(#)main.c	8.2 (Berkeley) 1/3/94";
 #endif
-#endif /* not lint */
 
 #include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/stat.h>
 
-#include <ctype.h>
+#include <err.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <libgen.h>
 #include <limits.h>
+#include <locale.h>
 #include <regex.h>
 #include <stddef.h>
+#define _WITH_GETLINE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -130,53 +104,96 @@ struct s_flist {
  */
 static struct s_flist *files, **fl_nextp = &files;
 
-int aflag, eflag, nflag, ere;
+FILE *infile;			/* Current input file */
+FILE *outfile;			/* Current output file */
+
+int aflag, eflag, nflag;
+int rflags = 0;
+static int rval;		/* Exit status */
+
+static int ispan;		/* Whether inplace editing spans across files */
 
 /*
  * Current file and line number; line numbers restart across compilation
- * units, but span across input files.
+ * units, but span across input files.  The latter is optional if editing
+ * in place.
  */
-const char *fname;			/* File name. */
+const char *fname;		/* File name. */
+const char *outfname;		/* Output file name */
+static char oldfname[PATH_MAX];	/* Old file name (for in-place editing) */
+static char tmpfname[PATH_MAX];	/* Temporary file name (for in-place editing) */
+static const char *inplace;	/* Inplace edit file extension. */
 u_long linenum;
-int lastline;			/* TRUE on the last line of the last file */
 
 static void add_compunit(enum e_cut, char *);
 static void add_file(char *);
-int	main(int, char **);
+static void usage(void) __dead;
 
 int
 main(int argc, char *argv[])
 {
 	int c, fflag;
+	char *temp_arg;
+
+	setprogname(argv[0]);
+	(void) setlocale(LC_ALL, "");
 
-	setprogname(*argv);
 	fflag = 0;
-	while ((c = getopt(argc, argv, "ae:f:nrE")) != -1)
+	inplace = NULL;
+
+	while ((c = getopt(argc, argv, "EI::ae:f:i::lnru")) != -1)
 		switch (c) {
+		case 'r':		/* Gnu sed compat */
+		case 'E':
+			rflags = REG_EXTENDED;
+			break;
+		case 'I':
+			inplace = optarg ? optarg : __UNCONST("");
+			ispan = 1;	/* span across input files */
+			break;
 		case 'a':
 			aflag = 1;
 			break;
 		case 'e':
 			eflag = 1;
-			add_compunit(CU_STRING, optarg);
+			temp_arg = xmalloc(strlen(optarg) + 2);
+			strcpy(temp_arg, optarg);
+			strcat(temp_arg, "\n");
+			add_compunit(CU_STRING, temp_arg);
 			break;
 		case 'f':
 			fflag = 1;
 			add_compunit(CU_FILE, optarg);
 			break;
+		case 'i':
+			inplace = optarg ? optarg : __UNCONST("");
+			ispan = 0;	/* don't span across input files */
+			break;
+		case 'l':
+#ifdef _IOLBF
+			c = setvbuf(stdout, NULL, _IOLBF, 0);
+#else
+			c = setlinebuf(stdout);
+#endif
+			if (c)
+				warn("setting line buffered output failed");
+			break;
 		case 'n':
 			nflag = 1;
 			break;
-		case 'r':
-		case 'E':
-			ere = REG_EXTENDED;
+		case 'u':
+#ifdef _IONBF
+			c = setvbuf(stdout, NULL, _IONBF, 0);
+#else
+			c = -1;
+			errno = EOPNOTSUPP;
+#endif
+			if (c)
+				warn("setting unbuffered output failed");
 			break;
 		default:
 		case '?':
-			(void)fprintf(stderr,
-"usage:\t%s [-aEnr] script [file ...]\n\t%s [-aEnr] [-e script] ... [-f script_file] ... [file ...]\n",
-			    getprogname(), getprogname());
-			exit(1);
+			usage();
 		}
 	argc -= optind;
 	argv += optind;
@@ -198,8 +215,18 @@ main(int argc, char *argv[])
 	process();
 	cfclose(prog, NULL);
 	if (fclose(stdout))
-		err(FATAL, "stdout: %s", strerror(errno));
-	exit (0);
+		err(1, "stdout");
+	exit(rval);
+}
+
+static void
+usage(void)
+{
+	(void)fprintf(stderr,
+	    "Usage:  %s [-aElnru] command [file ...]\n"
+	    "\t%s [-aElnru] [-e command] [-f command_file] [-I[extension]]\n"
+	    "\t    [-i[extension]] [file ...]\n", getprogname(), getprogname());
+	exit(1);
 }
 
 /*
@@ -207,36 +234,34 @@ main(int argc, char *argv[])
  * together.  Empty strings and files are ignored.
  */
 char *
-cu_fgets(char **outbuf, size_t *outsize)
+cu_fgets(char *buf, int n, int *more)
 {
 	static enum {ST_EOF, ST_FILE, ST_STRING} state = ST_EOF;
 	static FILE *f;		/* Current open file */
 	static char *s;		/* Current pointer inside string */
 	static char string_ident[30];
-	size_t len;
 	char *p;
 
-	if (*outbuf == NULL)
-		*outsize = 0;
-
 again:
 	switch (state) {
 	case ST_EOF:
-		if (script == NULL)
+		if (script == NULL) {
+			if (more != NULL)
+				*more = 0;
 			return (NULL);
+		}
 		linenum = 0;
 		switch (script->type) {
 		case CU_FILE:
 			if ((f = fopen(script->s, "r")) == NULL)
-				err(FATAL,
-				    "%s: %s", script->s, strerror(errno));
+				err(1, "%s", script->s);
 			fname = script->s;
 			state = ST_FILE;
 			goto again;
 		case CU_STRING:
-			if ((snprintf(string_ident,
+			if (((size_t)snprintf(string_ident,
 			    sizeof(string_ident), "\"%s\"", script->s)) >=
-			    (int)(sizeof(string_ident) - 1))
+			    sizeof(string_ident) - 1)
 				(void)strcpy(string_ident +
 				    sizeof(string_ident) - 6, " ...\"");
 			fname = string_ident;
@@ -245,18 +270,13 @@ again:
 			goto again;
 		}
 	case ST_FILE:
-		if ((p = fgetln(f, &len)) != NULL) {
+		if ((p = fgets(buf, n, f)) != NULL) {
 			linenum++;
-			if (len >= *outsize) {
-				free(*outbuf);
-				*outsize = ROUNDLEN(len + 1);
-				*outbuf = xmalloc(*outsize);
-			}
-			memcpy(*outbuf, p, len);
-			(*outbuf)[len] = '\0';
-			if (linenum == 1 && p[0] == '#' && p[1] == 'n')
+			if (linenum == 1 && buf[0] == '#' && buf[1] == 'n')
 				nflag = 1;
-			return (*outbuf);
+			if (more != NULL)
+				*more = !feof(f);
+			return (p);
 		}
 		script = script->next;
 		(void)fclose(f);
@@ -265,15 +285,14 @@ again:
 	case ST_STRING:
 		if (linenum == 0 && s[0] == '#' && s[1] == 'n')
 			nflag = 1;
-		p = *outbuf;
-		len = *outsize;
+		p = buf;
 		for (;;) {
-			if (len <= 1) {
-				*outbuf = xrealloc(*outbuf,
-				    *outsize + _POSIX2_LINE_MAX);
-				p = *outbuf + *outsize - len;
-				len += _POSIX2_LINE_MAX;
-				*outsize += _POSIX2_LINE_MAX;
+			if (n-- <= 1) {
+				*p = '\0';
+				linenum++;
+				if (more != NULL)
+					*more = 1;
+				return (buf);
 			}
 			switch (*s) {
 			case '\0':
@@ -285,17 +304,20 @@ again:
 					script = script->next;
 					*p = '\0';
 					linenum++;
-					return (*outbuf);
+					if (more != NULL)
+						*more = 0;
+					return (buf);
 				}
 			case '\n':
 				*p++ = '\n';
 				*p = '\0';
 				s++;
 				linenum++;
-				return (*outbuf);
+				if (more != NULL)
+					*more = 0;
+				return (buf);
 			default:
 				*p++ = *s++;
-				len--;
 			}
 		}
 	}
@@ -310,69 +332,143 @@ again:
 int
 mf_fgets(SPACE *sp, enum e_spflag spflag)
 {
-	static FILE *f;		/* Current open file */
+	struct stat sb;
 	size_t len;
-	char *p;
+	static char *p = NULL;
+	static size_t plen = 0;
 	int c;
+	static int firstfile;
 
-	if (f == NULL)
-		/* Advance to first non-empty file */
-		for (;;) {
-			if (files == NULL) {
-				lastline = 1;
-				return (0);
-			}
-			if (files->fname == NULL) {
-				f = stdin;
-				fname = "stdin";
-			} else {
-				fname = files->fname;
-				if ((f = fopen(fname, "r")) == NULL)
-					err(FATAL, "%s: %s",
-					    fname, strerror(errno));
+	if (infile == NULL) {
+		/* stdin? */
+		if (files->fname == NULL) {
+			if (inplace != NULL)
+				errx(1, "-I or -i may not be used with stdin");
+			infile = stdin;
+			fname = "stdin";
+			outfile = stdout;
+			outfname = "stdout";
+		}
+		firstfile = 1;
+	}
+
+	for (;;) {
+		if (infile != NULL && (c = getc(infile)) != EOF) {
+			(void)ungetc(c, infile);
+			break;
+		}
+		/* If we are here then either eof or no files are open yet */
+		if (infile == stdin) {
+			sp->len = 0;
+			return (0);
+		}
+		if (infile != NULL) {
+			fclose(infile);
+			if (*oldfname != '\0') {
+				/* if there was a backup file, remove it */
+				unlink(oldfname);
+				/*
+				 * Backup the original.  Note that hard links
+				 * are not supported on all filesystems.
+				 */
+				if ((link(fname, oldfname) != 0) &&
+				   (rename(fname, oldfname) != 0)) {
+					warn("rename()");
+					if (*tmpfname)
+						unlink(tmpfname);
+					exit(1);
+				}
+				*oldfname = '\0';
 			}
-			if ((c = getc(f)) != EOF) {
-				(void)ungetc(c, f);
-				break;
+			if (*tmpfname != '\0') {
+				if (outfile != NULL && outfile != stdout)
+					if (fclose(outfile) != 0) {
+						warn("fclose()");
+						unlink(tmpfname);
+						exit(1);
+					}
+				outfile = NULL;
+				if (rename(tmpfname, fname) != 0) {
+					/* this should not happen really! */
+					warn("rename()");
+					unlink(tmpfname);
+					exit(1);
+				}
+				*tmpfname = '\0';
 			}
-			(void)fclose(f);
+			outfname = NULL;
+		}
+		if (firstfile == 0)
 			files = files->next;
+		else
+			firstfile = 0;
+		if (files == NULL) {
+			sp->len = 0;
+			return (0);
+		}
+		fname = files->fname;
+		if (inplace != NULL) {
+			if (lstat(fname, &sb) != 0)
+				err(1, "%s", fname);
+			if (!(sb.st_mode & S_IFREG))
+				errx(1, "%s: %s %s", fname,
+				    "in-place editing only",
+				    "works for regular files");
+			if (*inplace != '\0') {
+				strlcpy(oldfname, fname,
+				    sizeof(oldfname));
+				len = strlcat(oldfname, inplace,
+				    sizeof(oldfname));
+				if (len > sizeof(oldfname))
+					errx(1, "%s: name too long", fname);
+			}
+			char d_name[PATH_MAX], f_name[PATH_MAX];
+			(void)strlcpy(d_name, fname, sizeof(d_name));
+			(void)strlcpy(f_name, fname, sizeof(f_name));
+			len = (size_t)snprintf(tmpfname, sizeof(tmpfname),
+			    "%s/.!%ld!%s", dirname(d_name), (long)getpid(),
+			    basename(f_name));
+			if (len >= sizeof(tmpfname))
+				errx(1, "%s: name too long", fname);
+			unlink(tmpfname);
+			if (outfile != NULL && outfile != stdout)
+				fclose(outfile);
+			if ((outfile = fopen(tmpfname, "w")) == NULL)
+				err(1, "%s", fname);
+			fchown(fileno(outfile), sb.st_uid, sb.st_gid);
+			fchmod(fileno(outfile), sb.st_mode & ALLPERMS);
+			outfname = tmpfname;
+			if (!ispan) {
+				linenum = 0;
+				resetstate();
+			}
+		} else {
+			outfile = stdout;
+			outfname = "stdout";
+		}
+		if ((infile = fopen(fname, "r")) == NULL) {
+			warn("%s", fname);
+			rval = 1;
+			continue;
 		}
-
-	if (lastline) {
-		sp->len = 0;
-		return (0);
 	}
-
 	/*
-	 * Use fgetln so that we can handle essentially infinite input data.
-	 * Can't use the pointer into the stdio buffer as the process space
-	 * because the ungetc() can cause it to move.
+	 * We are here only when infile is open and we still have something
+	 * to read from it.
+	 *
+	 * Use getline() so that we can handle essentially infinite input
+	 * data.  The p and plen are static so each invocation gives
+	 * getline() the same buffer which is expanded as needed.
 	 */
-	p = fgetln(f, &len);
-	if (ferror(f))
-		err(FATAL, "%s: %s", fname, strerror(errno ? errno : EIO));
-	cspace(sp, p, len, spflag);
+	ssize_t slen = getline(&p, &plen, infile);
+	if (slen == -1)
+		err(1, "%s", fname);
+	if (slen != 0 && p[slen - 1] == '\n')
+		slen--;
+	cspace(sp, p, (size_t)slen, spflag);
 
 	linenum++;
-	/* Advance to next non-empty file */
-	while ((c = getc(f)) == EOF) {
-		(void)fclose(f);
-		files = files->next;
-		if (files == NULL) {
-			lastline = 1;
-			return (1);
-		}
-		if (files->fname == NULL) {
-			f = stdin;
-			fname = "stdin";
-		} else {
-			fname = files->fname;
-			if ((f = fopen(fname, "r")) == NULL)
-				err(FATAL, "%s: %s", fname, strerror(errno));
-		}
-	}
-	(void)ungetc(c, f);
+
 	return (1);
 }
 
@@ -406,3 +502,16 @@ add_file(char *s)
 	fp->fname = s;
 	fl_nextp = &fp->next;
 }
+
+int
+lastline(void)
+{
+	int ch;
+
+	if (files->next != NULL && (inplace == NULL || ispan))
+		return (0);
+	if ((ch = getc(infile)) == EOF)
+		return (1);
+	ungetc(ch, infile);
+	return (0);
+}

Index: src/usr.bin/sed/process.c
diff -u src/usr.bin/sed/process.c:1.39 src/usr.bin/sed/process.c:1.39.6.1
--- src/usr.bin/sed/process.c:1.39	Sun Mar 17 21:02:54 2013
+++ src/usr.bin/sed/process.c	Sun Aug 10 06:58:55 2014
@@ -1,6 +1,7 @@
-/*	$NetBSD: process.c,v 1.39 2013/03/17 21:02:54 uwe Exp $	*/
+/*	$NetBSD: process.c,v 1.39.6.1 2014/08/10 06:58:55 tls Exp $	*/
 
 /*-
+ * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -32,53 +33,19 @@
  * SUCH DAMAGE.
  */
 
-/*-
- * Copyright (c) 1992 Diomidis Spinellis.
- *
- * This code is derived from software contributed to Berkeley by
- * Diomidis Spinellis of Imperial College, University of London.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
 #if HAVE_NBTOOL_CONFIG_H
 #include "nbtool_config.h"
 #endif
 
 #include <sys/cdefs.h>
-#ifndef lint
+__RCSID("$NetBSD: process.c,v 1.39.6.1 2014/08/10 06:58:55 tls Exp $");
+#ifdef __FBSDID
+__FBSDID("$FreeBSD: head/usr.bin/sed/process.c 192732 2009-05-25 06:45:33Z brian $");
+#endif
+
 #if 0
-static char sccsid[] = "@(#)process.c	8.6 (Berkeley) 4/20/94";
-#else
-__RCSID("$NetBSD: process.c,v 1.39 2013/03/17 21:02:54 uwe Exp $");
+static const char sccsid[] = "@(#)process.c	8.6 (Berkeley) 4/20/94";
 #endif
-#endif /* not lint */
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -86,6 +53,7 @@ __RCSID("$NetBSD: process.c,v 1.39 2013/
 #include <sys/uio.h>
 
 #include <ctype.h>
+#include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
@@ -94,27 +62,30 @@ __RCSID("$NetBSD: process.c,v 1.39 2013/
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
 
 #include "defs.h"
 #include "extern.h"
 
-static SPACE HS, PS, SS;
+static SPACE HS, PS, SS, YS;
 #define	pd		PS.deleted
 #define	ps		PS.space
 #define	psl		PS.len
 #define	hs		HS.space
 #define	hsl		HS.len
 
-static inline int	 applies(struct s_command *);
+static __inline int	 applies(struct s_command *);
+static void		 do_tr(struct s_tr *);
 static void		 flush_appends(void);
-static void		 lputs(char *);
-static inline int	 regexec_e(regex_t *, const char *, int, int, size_t);
+static void		 lputs(char *, size_t);
+static __inline int	 regexec_e(regex_t *, const char *, int, int, size_t);
 static void		 regsub(SPACE *, char *, char *);
 static int		 substitute(struct s_command *);
 
 struct s_appends *appends;	/* Array of pointers to strings to append. */
-static int appendx;		/* Index into appends array. */
-int appendnum;			/* Size of appends array. */
+static size_t appendx;		/* Index into appends array. */
+size_t appendnum;			/* Size of appends array. */
 
 static int lastaddr;		/* Set by applies if last address of a range. */
 static int sdone;		/* If any substitutes since last line input. */
@@ -123,17 +94,18 @@ static regex_t *defpreg;
 size_t maxnsub;
 regmatch_t *match;
 
-#define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
+#define OUT() do {fwrite(ps, 1, psl, outfile); fputc('\n', outfile);} while (0)
 
 void
 process(void)
 {
 	struct s_command *cp;
 	SPACE tspace;
-	size_t len, oldpsl;
+	size_t oldpsl = 0;
 	char *p;
 
-	oldpsl = 0;
+	p = NULL;
+
 	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
 		pd = 0;
 top:
@@ -149,12 +121,10 @@ redirect:
 				cp = cp->u.c;
 				goto redirect;
 			case 'a':
-				if (appendx >= appendnum) {
+				if (appendx >= appendnum)
 					appends = xrealloc(appends,
 					    sizeof(struct s_appends) *
-					    (appendnum * 2));
-					appendnum *= 2;
-				}
+					    (appendnum *= 2));
 				appends[appendx].type = AP_STRING;
 				appends[appendx].s = cp->t;
 				appends[appendx].len = strlen(cp->t);
@@ -166,22 +136,21 @@ redirect:
 			case 'c':
 				pd = 1;
 				psl = 0;
-				if (cp->a2 == NULL || lastaddr)
-					(void)printf("%s", cp->t);
+				if (cp->a2 == NULL || lastaddr || lastline())
+					(void)fprintf(outfile, "%s", cp->t);
 				goto new;
 			case 'd':
 				pd = 1;
 				goto new;
 			case 'D':
-				if (psl == 0)
-					pd = 1;
 				if (pd)
 					goto new;
-				if ((p = memchr(ps, '\n', psl - 1)) == NULL) {
+				if (psl == 0 ||
+				    (p = memchr(ps, '\n', psl - 1)) == NULL) {
 					pd = 1;
 					goto new;
 				} else {
-					psl -= (p + 1) - ps;
+					psl -= (size_t)((p + 1) - ps);
 					memmove(ps, p + 1, psl);
 					goto top;
 				}
@@ -189,25 +158,25 @@ redirect:
 				cspace(&PS, hs, hsl, REPLACE);
 				break;
 			case 'G':
-				if (hs == NULL)
-					cspace(&HS, "\n", 1, REPLACE);
-				cspace(&PS, hs, hsl, 0);
+				cspace(&PS, "\n", 1, APPEND);
+				cspace(&PS, hs, hsl, APPEND);
 				break;
 			case 'h':
 				cspace(&HS, ps, psl, REPLACE);
 				break;
 			case 'H':
-				cspace(&HS, ps, psl, 0);
+				cspace(&HS, "\n", 1, APPEND);
+				cspace(&HS, ps, psl, APPEND);
 				break;
 			case 'i':
-				(void)printf("%s", cp->t);
+				(void)fprintf(outfile, "%s", cp->t);
 				break;
 			case 'l':
-				lputs(ps);
+				lputs(ps, psl);
 				break;
 			case 'n':
 				if (!nflag && !pd)
-					OUT(ps)
+					OUT();
 				flush_appends();
 				if (!mf_fgets(&PS, REPLACE))
 					exit(0);
@@ -215,40 +184,36 @@ redirect:
 				break;
 			case 'N':
 				flush_appends();
-				if (!mf_fgets(&PS, 0)) {
-					if (!nflag && !pd)
-						OUT(ps)
+				cspace(&PS, "\n", 1, APPEND);
+				if (!mf_fgets(&PS, APPEND))
 					exit(0);
-				}
 				break;
 			case 'p':
 				if (pd)
 					break;
-				OUT(ps)
+				OUT();
 				break;
 			case 'P':
 				if (pd)
 					break;
 				if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
 					oldpsl = psl;
-					psl = (p + 1) - ps;
+					psl = (size_t)(p - ps);
 				}
-				OUT(ps)
+				OUT();
 				if (p != NULL)
 					psl = oldpsl;
 				break;
 			case 'q':
 				if (!nflag && !pd)
-					OUT(ps)
+					OUT();
 				flush_appends();
 				exit(0);
 			case 'r':
-				if (appendx >= appendnum) {
+				if (appendx >= appendnum)
 					appends = xrealloc(appends,
 					    sizeof(struct s_appends) *
-					    (appendnum * 2));
-					appendnum *= 2;
-				}
+					    (appendnum *= 2));
 				appends[appendx].type = AP_FILE;
 				appends[appendx].s = cp->t;
 				appends[appendx].len = strlen(cp->t);
@@ -270,36 +235,40 @@ redirect:
 				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
 				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
 				    DEFFILEMODE)) == -1)
-					err(FATAL, "%s: %s",
-					    cp->t, strerror(errno));
-				if ((size_t)write(cp->u.fd, ps, psl) != psl)
-					err(FATAL, "%s: %s",
-					    cp->t, strerror(errno));
+					err(1, "%s", cp->t);
+				if (write(cp->u.fd, ps, psl) != (ssize_t)psl ||
+				    write(cp->u.fd, "\n", 1) != 1)
+					err(1, "%s", cp->t);
 				break;
 			case 'x':
+				/*
+				 * If the hold space is null, make it empty
+				 * but not null.  Otherwise the pattern space
+				 * will become null after the swap, which is
+				 * an abnormal condition.
+				 */
 				if (hs == NULL)
-					cspace(&HS, "\n", 1, REPLACE);
+					cspace(&HS, "", 0, REPLACE);
 				tspace = PS;
 				PS = HS;
 				HS = tspace;
 				break;
 			case 'y':
-				if (pd)
+				if (pd || psl == 0)
 					break;
-				for (p = ps, len = psl; --len; ++p)
-					*p = cp->u.y[(int)*p];
+				do_tr(cp->u.y);
 				break;
 			case ':':
 			case '}':
 				break;
 			case '=':
-				(void)printf("%lu\n", linenum);
+				(void)fprintf(outfile, "%lu\n", linenum);
 			}
 			cp = cp->next;
 		} /* for all cp */
 
 new:		if (!nflag && !pd)
-			OUT(ps)
+			OUT();
 		flush_appends();
 	} /* for all lines */
 }
@@ -308,15 +277,15 @@ new:		if (!nflag && !pd)
  * TRUE if the address passed matches the current program state
  * (lastline, linenumber, ps).
  */
-#define	MATCH(a)						\
-	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) :	\
-	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
+#define	MATCH(a)							\
+	((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) :	\
+	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline())
 
 /*
- * Return TRUE if the command applies to the current line.  Sets the inrange
- * flag to process ranges.  Interprets the non-select (``!'') flag.
+ * Return TRUE if the command applies to the current line.  Sets the start
+ * line for process ranges.  Interprets the non-select (``!'') flag.
  */
-static inline int
+static __inline int
 applies(struct s_command *cp)
 {
 	int r;
@@ -324,34 +293,71 @@ applies(struct s_command *cp)
 	lastaddr = 0;
 	if (cp->a1 == NULL && cp->a2 == NULL)
 		r = 1;
-	else if (cp->a2) {
-		if (cp->inrange) {
+	else if (cp->a2)
+		if (cp->startline > 0) {
 			if (MATCH(cp->a2)) {
-				cp->inrange = 0;
+				cp->startline = 0;
 				lastaddr = 1;
-			}
-			r = 1;
+				r = 1;
+			} else if (linenum - cp->startline <= cp->a2->u.l)
+				r = 1;
+			else if ((cp->a2->type == AT_LINE &&
+				   linenum > cp->a2->u.l) ||
+				   (cp->a2->type == AT_RELLINE &&
+				   linenum - cp->startline > cp->a2->u.l)) {
+				/*
+				 * We missed the 2nd address due to a branch,
+				 * so just close the range and return false.
+				 */
+				cp->startline = 0;
+				r = 0;
+			} else
+				r = 1;
 		} else if (cp->a1 && MATCH(cp->a1)) {
 			/*
 			 * If the second address is a number less than or
 			 * equal to the line number first selected, only
 			 * one line shall be selected.
 			 *	-- POSIX 1003.2
+			 * Likewise if the relative second line address is zero.
 			 */
-			if (cp->a2->type == AT_LINE &&
-			    linenum >= cp->a2->u.l)
+			if ((cp->a2->type == AT_LINE &&
+			    linenum >= cp->a2->u.l) ||
+			    (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0))
 				lastaddr = 1;
-			else
-				cp->inrange = 1;
+			else {
+				cp->startline = linenum;
+			}
 			r = 1;
 		} else
 			r = 0;
-	} else
+	else
 		r = MATCH(cp->a1);
 	return (cp->nonsel ? ! r : r);
 }
 
 /*
+ * Reset the sed processor to its initial state.
+ */
+void
+resetstate(void)
+{
+	struct s_command *cp;
+
+	/*
+	 * Reset all in-range markers.
+	 */
+	for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
+		if (cp->a2)
+			cp->startline = 0;
+
+	/*
+	 * Clear out the hold space.
+	 */
+	cspace(&HS, "", 0, REPLACE);
+}
+
+/*
  * substitute --
  *	Do substitutions in the pattern space.  Currently, we build a
  *	copy of the new pattern space in the substitute space structure
@@ -362,24 +368,24 @@ substitute(struct s_command *cp)
 {
 	SPACE tspace;
 	regex_t *re;
-	size_t re_off, slen;
+	regoff_t re_off, slen;
 	int lastempty, n;
 	char *s;
 
 	s = ps;
 	re = cp->u.s->re;
 	if (re == NULL) {
-		if (defpreg != NULL && (size_t)cp->u.s->maxbref > defpreg->re_nsub) {
+		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
 			linenum = cp->u.s->linenum;
-			err(COMPILE, "\\%d not defined in the RE",
-			    cp->u.s->maxbref);
+			errx(1, "%lu: %s: \\%u not defined in the RE",
+					linenum, fname, cp->u.s->maxbref);
 		}
 	}
 	if (!regexec_e(re, s, 0, 0, psl))
 		return (0);
 
 	SS.len = 0;				/* Clean substitute space. */
-	slen = psl;
+	slen = (regoff_t)psl;
 	n = cp->u.s->n;
 	lastempty = 1;
 
@@ -390,7 +396,7 @@ substitute(struct s_command *cp)
 				/* Locate start of replaced string. */
 				re_off = match[0].rm_so;
 				/* Copy leading retained string. */
-				cspace(&SS, s, re_off, APPEND);
+				cspace(&SS, s, (size_t)re_off, APPEND);
 				/* Add in regular expression. */
 				regsub(&SS, s, cp->u.s->new);
 			}
@@ -401,26 +407,27 @@ substitute(struct s_command *cp)
 				slen -= match[0].rm_eo;
 				lastempty = 0;
 			} else {
-				if (match[0].rm_so == 0)
-					cspace(&SS,
-					    s, match[0].rm_so + 1, APPEND);
-				else
-					cspace(&SS,
-					    s + match[0].rm_so, 1, APPEND);
+				if (match[0].rm_so < slen)
+					cspace(&SS, s + match[0].rm_so, 1,
+					    APPEND);
 				s += match[0].rm_so + 1;
 				slen -= match[0].rm_so + 1;
 				lastempty = 1;
 			}
-		} while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
+		} while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, (size_t)slen));
 		/* Copy trailing retained string. */
 		if (slen > 0)
-			cspace(&SS, s, slen, APPEND);
+			cspace(&SS, s, (size_t)slen, APPEND);
 		break;
 	default:				/* Nth occurrence */
 		while (--n) {
+			if (match[0].rm_eo == match[0].rm_so)
+				match[0].rm_eo = match[0].rm_so + 1;
 			s += match[0].rm_eo;
 			slen -= match[0].rm_eo;
-			if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
+			if (slen < 0)
+				return (0);
+			if (!regexec_e(re, s, REG_NOTBOL, 0, (size_t)slen))
 				return (0);
 		}
 		/* FALLTHROUGH */
@@ -428,13 +435,13 @@ substitute(struct s_command *cp)
 		/* Locate start of replaced string. */
 		re_off = match[0].rm_so + (s - ps);
 		/* Copy leading retained string. */
-		cspace(&SS, ps, re_off, APPEND);
+		cspace(&SS, ps, (size_t)re_off, APPEND);
 		/* Add in regular expression. */
 		regsub(&SS, s, cp->u.s->new);
 		/* Copy trailing retained string. */
 		s += match[0].rm_eo;
 		slen -= match[0].rm_eo;
-		cspace(&SS, s, slen, APPEND);
+		cspace(&SS, s, (size_t)slen, APPEND);
 		break;
 	}
 
@@ -449,20 +456,76 @@ substitute(struct s_command *cp)
 
 	/* Handle the 'p' flag. */
 	if (cp->u.s->p)
-		OUT(ps)
+		OUT();
 
 	/* Handle the 'w' flag. */
 	if (cp->u.s->wfile && !pd) {
 		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
 		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
-			err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
-		if ((size_t)write(cp->u.s->wfd, ps, psl) != psl)
-			err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
+			err(1, "%s", cp->u.s->wfile);
+		if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl ||
+		    write(cp->u.s->wfd, "\n", 1) != 1)
+			err(1, "%s", cp->u.s->wfile);
 	}
 	return (1);
 }
 
 /*
+ * do_tr --
+ *	Perform translation ('y' command) in the pattern space.
+ */
+static void
+do_tr(struct s_tr *y)
+{
+	SPACE tmp;
+	char c, *p;
+	size_t clen, left;
+	size_t i;
+
+	if (MB_CUR_MAX == 1) {
+		/*
+		 * Single-byte encoding: perform in-place translation
+		 * of the pattern space.
+		 */
+		for (p = ps; p < &ps[psl]; p++)
+			*p = (char)y->bytetab[(u_char)*p];
+	} else {
+		/*
+		 * Multi-byte encoding: perform translation into the
+		 * translation space, then swap the translation and
+		 * pattern spaces.
+		 */
+		/* Clean translation space. */
+		YS.len = 0;
+		for (p = ps, left = psl; left > 0; p += clen, left -= clen) {
+			if ((c = (char)y->bytetab[(u_char)*p]) != '\0') {
+				cspace(&YS, &c, 1, APPEND);
+				clen = 1;
+				continue;
+			}
+			for (i = 0; i < y->nmultis; i++)
+				if (left >= y->multis[i].fromlen &&
+				    memcmp(p, y->multis[i].from,
+				    y->multis[i].fromlen) == 0)
+					break;
+			if (i < y->nmultis) {
+				cspace(&YS, y->multis[i].to,
+				    y->multis[i].tolen, APPEND);
+				clen = y->multis[i].fromlen;
+			} else {
+				cspace(&YS, p, 1, APPEND);
+				clen = 1;
+			}
+		}
+		/* Swap the translation space and the pattern space. */
+		tmp = PS;
+		PS = YS;
+		YS = tmp;
+		YS.space = YS.back;
+	}
+}
+
+/*
  * Flush append requests.  Always called before reading a line,
  * therefore it also resets the substitution done (sdone) flag.
  */
@@ -470,51 +533,58 @@ static void
 flush_appends(void)
 {
 	FILE *f;
-	int count, i;
+	size_t count, i;
 	char buf[8 * 1024];
 
-	for (i = 0; i < appendx; i++) 
+	for (i = 0; i < appendx; i++)
 		switch (appends[i].type) {
 		case AP_STRING:
-			fwrite(appends[i].s, sizeof(char), appends[i].len, 
-			    stdout);
+			fwrite(appends[i].s, sizeof(char), appends[i].len,
+			    outfile);
 			break;
 		case AP_FILE:
 			/*
 			 * Read files probably shouldn't be cached.  Since
 			 * it's not an error to read a non-existent file,
 			 * it's possible that another program is interacting
-			 * with the sed script through the file system.  It
+			 * with the sed script through the filesystem.  It
 			 * would be truly bizarre, but possible.  It's probably
 			 * not that big a performance win, anyhow.
 			 */
 			if ((f = fopen(appends[i].s, "r")) == NULL)
 				break;
-			while ((count =
-			    fread(buf, sizeof(char), sizeof(buf), f)) > 0)
-				(void)fwrite(buf, sizeof(char), count, stdout);
+			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
+				(void)fwrite(buf, sizeof(char), count, outfile);
 			(void)fclose(f);
 			break;
 		}
-	if (ferror(stdout))
-		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
-	appendx = sdone = 0;
+	if (ferror(outfile))
+		errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
+	appendx = 0;
+	sdone = 0;
 }
 
 static void
-lputs(char *s)
+lputs(char *s, size_t len)
 {
-	int count;
-	const char *escapes, *p;
-#ifndef HAVE_NBTOOL_CONFIG_H
+	static const char escapes[] = "\\\a\b\f\r\t\v";
+	int c;
+	size_t col, width;
+	const char *p;
+#ifdef TIOCGWINSZ
 	struct winsize win;
 #endif
-	static int termwidth = -1;
-
-	if (termwidth == -1) {
-		if ((p = getenv("COLUMNS")) != NULL)
-			termwidth = atoi(p);
-#ifndef HAVE_NBTOOL_CONFIG_H
+	static size_t termwidth = (size_t)-1;
+	size_t clen, i;
+	wchar_t wc;
+	mbstate_t mbs;
+
+	if (outfile != stdout)
+		termwidth = 60;
+	if (termwidth == (size_t)-1) {
+		if ((p = getenv("COLUMNS")) && *p != '\0')
+			termwidth = (size_t)atoi(p);
+#ifdef TIOCGWINSZ
 		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
 		    win.ws_col > 0)
 			termwidth = win.ws_col;
@@ -522,54 +592,81 @@ lputs(char *s)
 		else
 			termwidth = 60;
 	}
-	for (count = 0; *s; ++s) { 
-		if (count >= termwidth) {
-			(void)printf("\\\n");
-			count = 0;
+	if (termwidth == 0)
+		termwidth = 1;
+
+	memset(&mbs, 0, sizeof(mbs));
+	col = 0;
+	while (len != 0) {
+		clen = mbrtowc(&wc, s, len, &mbs);
+		if (clen == 0)
+			clen = 1;
+		if (clen == (size_t)-1 || clen == (size_t)-2) {
+			wc = (unsigned char)*s;
+			clen = 1;
+			memset(&mbs, 0, sizeof(mbs));
 		}
-		if (isascii((unsigned char)*s) && isprint((unsigned char)*s) &&
-		    *s != '\\') {
-			(void)putchar(*s);
-			count++;
+		if (wc == '\n') {
+			if (col + 1 >= termwidth)
+				fprintf(outfile, "\\\n");
+			fputc('$', outfile);
+			fputc('\n', outfile);
+			col = 0;
+		} else if (iswprint(wc)) {
+			width = (size_t)wcwidth(wc);
+			if (col + width >= termwidth) {
+				fprintf(outfile, "\\\n");
+				col = 0;
+			}
+			fwrite(s, 1, clen, outfile);
+			col += width;
+		} else if (wc != L'\0' && (c = wctob(wc)) != EOF &&
+		    (p = strchr(escapes, c)) != NULL) {
+			if (col + 2 >= termwidth) {
+				fprintf(outfile, "\\\n");
+				col = 0;
+			}
+			fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]);
+			col += 2;
 		} else {
-			escapes = "\\\a\b\f\n\r\t\v";
-			(void)putchar('\\');
-			if ((p = strchr(escapes, *s)) != NULL) {
-				(void)putchar("\\abfnrtv"[p - escapes]);
-				count += 2;
-			} else {
-				(void)printf("%03o", *(u_char *)s);
-				count += 4;
+			if (col + 4 * clen >= termwidth) {
+				fprintf(outfile, "\\\n");
+				col = 0;
 			}
+			for (i = 0; i < clen; i++)
+				fprintf(outfile, "\\%03o",
+				    (int)(unsigned char)s[i]);
+			col += 4 * clen;
 		}
+		s += clen;
+		len -= clen;
 	}
-	(void)putchar('$');
-	(void)putchar('\n');
-	if (ferror(stdout))
-		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
+	if (col + 1 >= termwidth)
+		fprintf(outfile, "\\\n");
+	(void)fputc('$', outfile);
+	(void)fputc('\n', outfile);
+	if (ferror(outfile))
+		errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
 }
 
-static inline int
-regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, size_t slen)
+static __inline int
+regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
+	size_t slen)
 {
 	int eval;
 #ifndef REG_STARTEND
 	char *buf;
 #endif
-	
+
 	if (preg == NULL) {
 		if (defpreg == NULL)
-			err(FATAL, "first RE may not be empty");
+			errx(1, "first RE may not be empty");
 	} else
 		defpreg = preg;
 
-	/* Set anchors, discounting trailing newline (if any). */
-	if (slen > 0 && string[slen - 1] == '\n')
-		slen--;
-
+	/* Set anchors */
 #ifndef REG_STARTEND
-	if ((buf = malloc(slen + 1)) == NULL)
-		err(1, NULL);
+	buf = xmalloc(slen + 1);
 	(void)memcpy(buf, string, slen);
 	buf[slen] = '\0';
 	eval = regexec(defpreg, buf,
@@ -577,7 +674,7 @@ regexec_e(regex_t *preg, const char *str
 	free(buf);
 #else
 	match[0].rm_so = 0;
-	match[0].rm_eo = slen;
+	match[0].rm_eo = (regoff_t)slen;
 	eval = regexec(defpreg, string,
 	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
 #endif
@@ -587,9 +684,8 @@ regexec_e(regex_t *preg, const char *str
 	case REG_NOMATCH:
 		return (0);
 	}
-	err(FATAL, "RE error: %s", strregerror(eval, defpreg));
+	errx(1, "RE error: %s", strregerror(eval, defpreg));
 	/* NOTREACHED */
-	return (0);
 }
 
 /*
@@ -599,14 +695,15 @@ regexec_e(regex_t *preg, const char *str
 static void
 regsub(SPACE *sp, char *string, char *src)
 {
-	int len, no;
+	size_t len;
+	int no;
 	char c, *dst;
 
 #define	NEEDSP(reqlen)							\
+	/* XXX What is the +1 for? */					\
 	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
-		size_t newlen = sp->blen + (reqlen) + 1024;		\
-		sp->space = sp->back = xrealloc(sp->back, newlen);	\
-		sp->blen = newlen;					\
+		sp->blen += (reqlen) + 1024;				\
+		sp->space = sp->back = xrealloc(sp->back, sp->blen);	\
 		dst = sp->space + sp->len;				\
 	}
 
@@ -619,13 +716,13 @@ regsub(SPACE *sp, char *string, char *sr
 		else
 			no = -1;
 		if (no < 0) {		/* Ordinary character. */
- 			if (c == '\\' && (*src == '\\' || *src == '&'))
- 				c = *src++;
+			if (c == '\\' && (*src == '\\' || *src == '&'))
+				c = *src++;
 			NEEDSP(1);
- 			*dst++ = c;
+			*dst++ = c;
 			++sp->len;
- 		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
-			len = match[no].rm_eo - match[no].rm_so;
+		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
+			len = (size_t)(match[no].rm_eo - match[no].rm_so);
 			NEEDSP(len);
 			memmove(dst, string + match[no].rm_so, len);
 			dst += len;
@@ -637,9 +734,9 @@ regsub(SPACE *sp, char *string, char *sr
 }
 
 /*
- * aspace --
- *	Append the source space to the destination space, allocating new
- *	space as necessary.
+ * cspace --
+ *	Concatenate space: append the source space to the destination space,
+ *	allocating new space as necessary.
  */
 void
 cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
@@ -649,9 +746,8 @@ cspace(SPACE *sp, const char *p, size_t 
 	/* Make sure SPACE has enough memory and ramp up quickly. */
 	tlen = sp->len + len + 1;
 	if (tlen > sp->blen) {
-		size_t newlen = tlen + 1024;
-		sp->space = sp->back = xrealloc(sp->back, newlen);
-		sp->blen = newlen;
+		sp->blen = tlen + 1024;
+		sp->space = sp->back = xrealloc(sp->back, sp->blen);
 	}
 
 	if (spflag == REPLACE)
@@ -673,13 +769,12 @@ cfclose(struct s_command *cp, struct s_c
 		switch(cp->code) {
 		case 's':
 			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
-				err(FATAL,
-				    "%s: %s", cp->u.s->wfile, strerror(errno));
+				err(1, "%s", cp->u.s->wfile);
 			cp->u.s->wfd = -1;
 			break;
 		case 'w':
 			if (cp->u.fd != -1 && close(cp->u.fd))
-				err(FATAL, "%s: %s", cp->t, strerror(errno));
+				err(1, "%s", cp->t);
 			cp->u.fd = -1;
 			break;
 		case '{':

Index: src/usr.bin/sed/sed.1
diff -u src/usr.bin/sed/sed.1:1.32 src/usr.bin/sed/sed.1:1.32.4.1
--- src/usr.bin/sed/sed.1:1.32	Wed May 29 15:05:43 2013
+++ src/usr.bin/sed/sed.1	Sun Aug 10 06:58:55 2014
@@ -1,5 +1,4 @@
-.\"	$NetBSD: sed.1,v 1.32 2013/05/29 15:05:43 wiz Exp $
-.\"
+.\"	$NetBSD: sed.1,v 1.32.4.1 2014/08/10 06:58:55 tls Exp $
 .\" Copyright (c) 1992, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
@@ -31,8 +30,9 @@
 .\" SUCH DAMAGE.
 .\"
 .\"	@(#)sed.1	8.2 (Berkeley) 12/30/93
+.\" $FreeBSD: head/usr.bin/sed/sed.1 259132 2013-12-09 18:57:20Z eadler $
 .\"
-.Dd May 29, 2013
+.Dd June 18, 2014
 .Dt SED 1
 .Os
 .Sh NAME
@@ -40,14 +40,16 @@
 .Nd stream editor
 .Sh SYNOPSIS
 .Nm
-.Op Fl aEnr
+.Op Fl aElnru
 .Ar command
-.Op Ar file ...
+.Op Ar
 .Nm
-.Op Fl aEnr
+.Op Fl aElnru
 .Op Fl e Ar command
 .Op Fl f Ar command_file
-.Op Ar file ...
+.Op Fl I Ns Op Ar extension
+.Op Fl i Ns Op Ar extension
+.Op Ar
 .Sh DESCRIPTION
 The
 .Nm
@@ -80,8 +82,11 @@ to delay opening each file until a comma
 .Dq w
 function is applied to a line of input.
 .It Fl E
-Enables the use of extended regular expressions instead of the
-usual basic regular expression syntax.
+Interpret regular expressions as extended (modern) regular expressions
+rather than basic regular expressions (BRE's).
+The
+.Xr re_format 7
+manual page fully describes both formats.
 .It Fl e Ar command
 Append the editing commands specified by the
 .Ar command
@@ -92,6 +97,48 @@ Append the editing commands found in the
 .Ar command_file
 to the list of commands.
 The editing commands should each be listed on a separate line.
+.It Fl I Ns Op Ar extension
+Edit files in-place, saving backups with the specified
+.Ar extension .
+If no
+.Ar extension
+is given, no backup will be saved.
+It is not recommended to give a zero-length
+.Ar extension
+when in-place editing files, as you risk corruption or partial content
+in situations where disk space is exhausted, etc.
+.Pp
+Note that in-place editing with
+.Fl I
+still takes place in a single continuous line address space covering
+all files, although each file preserves its individuality instead of
+forming one output stream.
+The line counter is never reset between files, address ranges can span
+file boundaries, and the
+.Dq $
+address matches only the last line of the last file.
+(See
+.Sx "Sed Addresses" . )
+That can lead to unexpected results in many cases of in-place editing,
+where using
+.Fl i
+is desired.
+.It Fl i Ns Op Ar extension
+Edit files in-place similarly to
+.Fl I ,
+but treat each file independently from other files.
+In particular, line numbers in each file start at 1,
+the
+.Dq $
+address matches the last line of the current file,
+and address ranges are limited to the current file.
+(See
+.Sx "Sed Addresses" . )
+The net result is as though each file were edited by a separate
+.Nm
+instance.
+.It Fl l
+Make output line buffered.
 .It Fl n
 By default, each line of input is echoed to the standard output after
 all of the commands have been applied to it.
@@ -99,17 +146,19 @@ The
 .Fl n
 option suppresses this behavior.
 .It Fl r
-Identical to
-.Fl E ,
-present for compatibility with GNU sed.
+Same as
+.Fl E
+for compatibility with GNU sed.
+.It Fl u
+Make output unbuffered.
 .El
 .Pp
 The form of a
 .Nm
 command is as follows:
-.sp
+.Pp
 .Dl [address[,address]]function[arguments]
-.sp
+.Pp
 Whitespace may be inserted before the first address and the function
 portions of the command.
 .Pp
@@ -128,28 +177,58 @@ deletes the pattern space.
 Some of the functions use a
 .Em "hold space"
 to save all or part of the pattern space for subsequent retrieval.
-.Sh SED ADDRESSES
-An address is not required, but if specified must be a number (that counts
+.Ss "Sed Addresses"
+An address is not required, but if specified must have one of the
+following formats:
+.Bl -bullet -offset indent
+.It
+a number that counts
 input lines
-cumulatively across input files), a dollar
-.Po
-.Dq $
-.Pc
-character that addresses the last line of input, or a context address
-(which consists of a regular expression preceded and followed by a
-delimiter).
+cumulatively across input files (or in each file independently
+if a
+.Fl i
+option is in effect);
+.It
+a dollar
+.Pq Dq $
+character that addresses the last line of input (or the last line
+of the current file if a
+.Fl i
+option was specified);
+.It
+a context address
+that consists of a regular expression preceded and followed by a
+delimiter.
+The closing delimiter can also optionally be followed by the
+.Dq i
+character, to indicate that the regular expression is to be matched
+in a case-insensitive way.
+.El
 .Pp
 A command line with no addresses selects every pattern space.
 .Pp
 A command line with one address selects all of the pattern spaces
 that match the address.
 .Pp
-A command line with two addresses selects the inclusive range from
-the first pattern space that matches the first address through the next
-pattern space that matches the second.
-(If the second address is a number less than or equal to the line number
-first selected, only that line is selected.)
-Starting at the first line following the selected range,
+A command line with two addresses selects an inclusive range.
+This
+range starts with the first pattern space that matches the first
+address.
+The end of the range is the next following pattern space
+that matches the second address.
+If the second address is a number
+less than or equal to the line number first selected, only that
+line is selected.
+The number in the second address may be prefixed with a
+.Pq Dq \&+
+to specify the number of lines to match after the first pattern.
+In the case when the second address is a context
+address,
+.Nm
+does not re-match the second address against the
+pattern space that matched the first address.
+Starting at the
+first line following the selected range,
 .Nm
 starts looking again for the first address.
 .Pp
@@ -157,38 +236,45 @@ Editing commands can be applied to non-s
 of the exclamation character
 .Pq Dq \&!
 function.
-.Sh SED REGULAR EXPRESSIONS
-The
-.Nm
-regular expressions are basic regular expressions (BRE's, see
+.Ss "Sed Regular Expressions"
+The regular expressions used in
+.Nm ,
+by default, are basic regular expressions (BREs, see
 .Xr re_format 7
-for more information).
+for more information), but extended (modern) regular expressions can be used
+instead if the
+.Fl E
+flag is given.
 In addition,
 .Nm
-has the following two additions to BRE's:
-.sp
+has the following two additions to regular expressions:
+.Pp
 .Bl -enum -compact
 .It
 In a context address, any character other than a backslash
-.Po
-.Dq \e
-.Pc
-or newline character may be used to delimit the regular expression
-by prefixing the first use of that delimiter with a backslash.
+.Pq Dq \e
+or newline character may be used to delimit the regular expression.
+The opening delimiter needs to be preceded by a backslash
+unless it is a slash.
+For example, the context address
+.Li \exabcx
+is equivalent to
+.Li /abc/ .
 Also, putting a backslash character before the delimiting character
-causes the character to be treated literally.
-For example, in the context address \exabc\exdefx, the RE delimiter
-is an
+within the regular expression causes the character to be treated literally.
+For example, in the context address
+.Li \exabc\exdefx ,
+the RE delimiter is an
 .Dq x
 and the second
 .Dq x
 stands for itself, so that the regular expression is
 .Dq abcxdef .
-.sp
+.Pp
 .It
 The escape sequence \en matches a newline character embedded in the
 pattern space.
-You can't, however, use a literal newline character in an address or
+You cannot, however, use a literal newline character in an address or
 in the substitute command.
 .El
 .Pp
@@ -196,8 +282,8 @@ One special feature of
 .Nm
 regular expressions is that they can default to the last regular
 expression used.
-If a regular expression is empty, with nothing between the delimiter
-characters, the last regular expression encountered is used instead.
+If a regular expression is empty, i.e., just the delimiter characters
+are specified, the last regular expression encountered is used instead.
 The last regular expression is defined as the last regular expression
 used as part of an address or substitute command, and at run-time, not
 compile-time.
@@ -207,7 +293,7 @@ will substitute
 .Dq XXX
 for the pattern
 .Dq abc .
-.Sh SED FUNCTIONS
+.Ss "Sed Functions"
 In the following list of commands, the maximum number of permissible
 addresses for each command is indicated by [0addr], [1addr], or [2addr],
 representing zero, one, or two addresses.
@@ -261,73 +347,69 @@ can be preceded by white space and can b
 The function can be preceded by white space.
 The terminating
 .Dq }
-must be preceded by a newline (and optionally white space).
-.sp
+must be preceded by a newline, and may also be preceded by white space.
+.Pp
 .Bl -tag -width "XXXXXX" -compact
 .It [2addr] function-list
 Execute function-list only when the pattern space is selected.
-.sp
+.Pp
 .It [1addr]a\e
 .It text
-.br
 Write
 .Em text
 to standard output immediately before each attempt to read a line of input,
 whether by executing the
 .Dq N
 function or by beginning a new cycle.
-.sp
+.Pp
 .It [2addr]b[label]
 Branch to the
 .Dq \&:
 function with the specified label.
 If the label is not specified, branch to the end of the script.
-.sp
+.Pp
 .It [2addr]c\e
 .It text
-.br
 Delete the pattern space.
 With 0 or 1 address or at the end of a 2-address range,
 .Em text
 is written to the standard output.
-Start the next cycle.
-.sp
+.Pp
 .It [2addr]d
 Delete the pattern space and start the next cycle.
-.sp
+.Pp
 .It [2addr]D
 Delete the initial segment of the pattern space through the first
 newline character and start the next cycle.
-.sp
+.Pp
 .It [2addr]g
 Replace the contents of the pattern space with the contents of the
 hold space.
-.sp
+.Pp
 .It [2addr]G
 Append a newline character followed by the contents of the hold space
 to the pattern space.
-.sp
+.Pp
 .It [2addr]h
 Replace the contents of the hold space with the contents of the
 pattern space.
-.sp
+.Pp
 .It [2addr]H
 Append a newline character followed by the contents of the pattern space
 to the hold space.
-.sp
+.Pp
 .It [1addr]i\e
 .It text
-.br
 Write
 .Em text
 to the standard output.
-.sp
+.Pp
 .It [2addr]l
 (The letter ell.)
 Write the pattern space to the standard output in a visually unambiguous
 form.
 This form is as follows:
-.sp
+.Pp
 .Bl -tag -width "carriage-returnXX" -offset indent -compact
 .It backslash
 \e\e
@@ -335,8 +417,6 @@ This form is as follows:
 \ea
 .It form-feed
 \ef
-.It newline
-\en
 .It carriage-return
 \er
 .It tab
@@ -352,28 +432,28 @@ Long lines are folded, with the point of
 a backslash followed by a newline.
 The end of each line is marked with a
 .Dq $ .
-.sp
+.Pp
 .It [2addr]n
 Write the pattern space to the standard output if the default output has
 not been suppressed, and replace the pattern space with the next line of
-input. (Does not begin a new cycle.)
-.sp
+input.
+.Pp
 .It [2addr]N
 Append the next line of input to the pattern space, using an embedded
 newline character to separate the appended material from the original
 contents.
 Note that the current line number changes.
-.sp
+.Pp
 .It [2addr]p
 Write the pattern space to standard output.
-.sp
+.Pp
 .It [2addr]P
 Write the pattern space, up to the first newline character to the
 standard output.
-.sp
+.Pp
 .It [1addr]q
 Branch to the end of the script and quit without starting a new cycle.
-.sp
+.Pp
 .It [1addr]r file
 Copy the contents of
 .Em file
@@ -383,7 +463,7 @@ If
 .Em file
 cannot be read for any reason, it is silently ignored and no error
 condition is set.
-.sp
+.Pp
 .It [2addr]s/regular expression/replacement/flags
 Substitute the replacement string for the first instance of the regular
 expression in the pattern space.
@@ -393,12 +473,10 @@ Within the RE and the replacement, the R
 a literal character if it is preceded by a backslash.
 .Pp
 An ampersand
-.Po
-.Dq \*[Am]
-.Pc
+.Pq Dq &
 appearing in the replacement is replaced by the string matching the RE.
 The special meaning of
-.Dq \*[Am]
+.Dq &
 in this context can be suppressed by preceding it by a backslash.
 The string
 .Dq \e# ,
@@ -416,9 +494,10 @@ The value of
 .Em flags
 in the substitute function is zero or more of the following:
 .Bl -tag -width "XXXXXX" -offset indent
-.It "0 ... 9"
-Make the substitution only for the N'th occurrence of the regular
-expression in the pattern space.
+.It Ar N
+Make the substitution only for the
+.Ar N Ns 'th
+occurrence of the regular expression in the pattern space.
 .It g
 Make the substitution for all non-overlapping matches of the
 regular expression, not just the first one.
@@ -432,8 +511,10 @@ Append the pattern space to
 if a replacement was made.
 If the replacement string is identical to that which it replaces, it
 is still considered to have been a replacement.
+.It i or I
+Match the regular expression in a case-insensitive way.
 .El
-.sp
+.Pp
 .It [2addr]t [label]
 Branch to the
 .Dq \&:
@@ -442,14 +523,14 @@ most recent reading of an input line or 
 .Dq t
 function.
 If no label is specified, branch to the end of the script.
-.sp
+.Pp
 .It [2addr]w Em file
 Append the pattern space to the
 .Em file .
-.sp
+.Pp
 .It [2addr]x
 Swap the contents of the pattern and hold spaces.
-.sp
+.Pp
 .It [2addr]y/string1/string2/
 Replace all occurrences of characters in
 .Em string1
@@ -464,27 +545,27 @@ and
 a backslash followed by any character other than a newline is that literal
 character, and a backslash followed by an ``n'' is replaced by a newline
 character.
-.sp
+.Pp
 .It [2addr]!function
 .It [2addr]!function-list
 Apply the function or function-list only to the lines that are
 .Em not
 selected by the address(es).
-.sp
+.Pp
 .It [0addr]:label
 This function does nothing; it bears a label to which the
 .Dq b
 and
 .Dq t
 commands may branch.
-.sp
+.Pp
 .It [1addr]=
 Write the line number to the standard output followed by a newline
 character.
-.sp
+.Pp
 .It [0addr]
 Empty lines are ignored.
-.sp
+.Pp
 .It [0addr]#
 The
 .Dq #
@@ -496,25 +577,64 @@ This is the same as specifying the
 .Fl n
 option on the command line.
 .El
-.Pp
+.Sh ENVIRONMENT
 The
+.Ev COLUMNS , LANG , LC_ALL , LC_CTYPE
+and
+.Ev LC_COLLATE
+environment variables affect the execution of
 .Nm
-utility exits 0 on success and \*[Gt]0 if an error occurs.
+as described in
+.Xr environ 7 .
+.Sh EXIT STATUS
+.Ex -std
 .Sh SEE ALSO
 .Xr awk 1 ,
 .Xr ed 1 ,
 .Xr grep 1 ,
-.Xr tr 1 ,
 .Xr regex 3 ,
 .Xr re_format 7
 .Sh STANDARDS
 The
 .Nm
-function is expected to be a superset of the
+utility is expected to be a superset of the
 .St -p1003.2
 specification.
+.Pp
+The
+.Fl a , E , I ,
+and
+.Fl i
+options, the prefixing
+.Dq \&+
+in the second member of an address range,
+as well as the
+.Dq I
+flag to the address regular expression and substitution command are
+non-standard
+.Fx
+extensions and may not be available on other operating systems.
 .Sh HISTORY
 A
 .Nm
-command appeared in
+command, written by
+.An L. E. McMahon ,
+appeared in
 .At v7 .
+.Sh AUTHORS
+.An "Diomidis D. Spinellis" Aq d...@freebsd.org
+.Sh BUGS
+Multibyte characters containing a byte with value 0x5C
+.Tn ( ASCII
+.Ql \e )
+may be incorrectly treated as line continuation characters in arguments to the
+.Dq a ,
+.Dq c
+and
+.Dq i
+commands.
+Multibyte characters cannot be used as delimiters with the
+.Dq s
+and
+.Dq y
+commands.

CVS commit: [tls-earlyentropy] src/usr.bin/sed

Reply via email to