On Mon, Oct 26, 2020 at 05:39:30PM +0700, Robert Elz wrote:
 >   | Also if inserting newlines is an intended use case, I kinda think it
 >   | ought to accept \n in there, which it currently doesn't.
 > 
 > That would be "C string quoting" which is $'\n' which isn't yet in POSIX
 > but should be coming sometime, it is supported by most (Bournish) shells

I was thinking in lam itself, like this:
(after all, sed has something similar)
I made it specifically recognize only \n and \t (and not \\ or
anything else) to minimize the chances of it breaking anything.


Index: lam.1
===================================================================
RCS file: /cvsroot/src/usr.bin/lam/lam.1,v
retrieving revision 1.14
diff -u -r1.14 lam.1
--- lam.1       26 Oct 2020 04:09:32 -0000      1.14
+++ lam.1       26 Oct 2020 04:12:48 -0000
@@ -91,6 +91,9 @@
 This option may appear after the last file.
 (A capitalized version appearing before the last file is not carried
 over past the last file.)
+Instances of the sequences "\en" and "\et" are replaced with hard
+newlines and tabs, respectively.
+(No other escapes are recognized.)
 .It Fl t Ar c
 The input line terminator is
 .Ar c
Index: lam.c
===================================================================
RCS file: /cvsroot/src/usr.bin/lam/lam.c,v
retrieving revision 1.8
diff -u -r1.8 lam.c
--- lam.c       4 Sep 2011 20:28:09 -0000       1.8
+++ lam.c       26 Oct 2020 04:12:48 -0000
@@ -74,6 +74,7 @@
 static char    *gatherline(struct openfile *);
 static void     getargs(char *[]);
 static char    *pad(struct openfile *);
+static const char *getseparator(const char *arg);
 
 int
 main(int argc, char *argv[])
@@ -128,7 +129,7 @@
                switch (tolower((unsigned char) *c)) {
                case 's':
                        if (*++p || (p = *++av))
-                               ip->sepstring = p;
+                               ip->sepstring = getseparator(p);
                        else
                                error("Need string after -%s", c);
                        S = (*c == 'S' ? 1 : 0);
@@ -216,6 +217,50 @@
        return (lp);
 }
 
+static const char *
+getseparator(const char *arg)
+{
+       size_t i, j;
+       int seen;
+       char *p;
+
+       seen = 0;
+       for (i = 0; arg[i] != '\0'; i++) {
+               /*
+                * React only specifically to \n and \t; leave
+                * anything else alone.
+                */
+               if (arg[i] == '\\' && (arg[i+1] == 'n' || arg[i+1] == 't')) {
+                       seen = 1;
+               }
+       }
+       if (!seen) {
+               warnx("blah");
+               return arg;
+       }
+
+       /* sufficient: length of result is at most strlen(arg) - 1 */
+       p = malloc(strlen(arg));
+       if (p == NULL) {
+               err(1, "malloc");
+       }
+       for (i = j = 0; arg[i] != '\0'; i++) {
+               if (arg[i] == '\\' && arg[i+1] == 'n') {
+                       p[j++] = '\n';
+                       /* skip the \\ */
+                       i++;
+               } else if (arg[i] == '\\' && arg[i+1] == 't') {
+                       p[j++] = '\t';
+                       /* skip the \\ */
+                       i++;
+               } else {
+                       p[j++] = arg[i];
+               }
+       }
+       p[j] = '\0';
+       return p;
+}
+
 static void
 error(const char *msg, const char *s)
 {


-- 
David A. Holland
dholl...@netbsd.org

Reply via email to