i thought adding the 's' command's 'x' flag would be a nice warm up to
trying to implement `tar --transform` ... but it turns out there's no such
flag in GNU sed. we just misread the GNU tar manual (which never strictly
claims this to be the case, but also didn't explicitly call out that this
isn't also supported in sed).

i've attached the patch anyway, since i only worked this out after
the fact, but i'm deliberately not sending the usual "[PATCH]" style mail
to call your attention to the fact that this would be a toybox extension if
we added it to sed.

 --

[PATCH] sed: add 'x' flag to the 's' command.

The GNU tar manual, when talking about the `tar --transform` option that I
need to implement, describes the 'x' flag by saying "regexp is an extended
regular expression (see section 'Extended regular expressions' in GNU sed)".

Only it turns out that even the latest GNU sed doesn't actually have
that flag. It's unique to `tar --transform`. That link is just telling
you that the sed manual will explain extended regular expressions, not
that GNU sed also supports the 'x' flag.

So I don't know whether we want this in toybox sed after all. (It made
sense that sed would have such a flag, but no sed that I know of
actually does.)
---
 tests/sed.test   |  2 ++
 toys/posix/sed.c | 16 +++++++++-------
 2 files changed, 11 insertions(+), 7 deletions(-)
From cd797812153da7bbca119b366623fd21df984c3b Mon Sep 17 00:00:00 2001
From: Elliott Hughes <e...@google.com>
Date: Thu, 3 Dec 2020 16:38:26 -0800
Subject: [PATCH] sed: add 'x' flag to the 's' command.

The GNU tar manual, when talking about the `tar --transform` option that I
need to implement, describes the 'x' flag by saying "regexp is an extended
regular expression (see section 'Extended regular expressions' in GNU sed)".

Only it turns out that even the latest GNU sed doesn't actually have
that flag. It's unique to `tar --transform`. That link is just telling
you that the sed manual will explain extended regular expressions, not
that GNU sed also supports the 'x' flag.

So I don't know whether we want this in toybox sed after all. (It made
sense that sed would have such a flag, but no sed that I know of
actually does.)
---
 tests/sed.test   |  2 ++
 toys/posix/sed.c | 16 +++++++++-------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/tests/sed.test b/tests/sed.test
index beb11d5c..c3928e58 100755
--- a/tests/sed.test
+++ b/tests/sed.test
@@ -185,6 +185,8 @@ testing '\n with empty capture' \
 testing '\n too high' \
     'sed -E "s/(.*)/\2/p" 2>/dev/null || echo OK' "OK\n" "" "foo"
 
+toyonly testing 's///x' 'sed "s/(hello )?(world)/\2/x"' "world" "" "hello world"
+
 # Performance test
 X=x; Y=20; while [ $Y -gt 0 ]; do X=$X$X; Y=$(($Y-1)); done
 testing 'megabyte s/x/y/g (20 sec timeout)' \
diff --git a/toys/posix/sed.c b/toys/posix/sed.c
index 8fbef0cb..9bd05034 100644
--- a/toys/posix/sed.c
+++ b/toys/posix/sed.c
@@ -147,7 +147,7 @@ struct sedcmd {
   int rmatch[2];  // offset of regex struct for prefix matches (/abc/,/def/p)
   int arg1, arg2, w; // offset of two arguments per command, plus s//w filename
   unsigned not, hit;
-  unsigned sflags; // s///flag bits: i=1, g=2, p=4
+  unsigned sflags; // s///flag bits: i=1, g=2, p=4, x=8
   char c; // action
 };
 
@@ -441,7 +441,7 @@ static void sed_line(char **pline, long plen)
         } else zmatch = 0;
 
         // If we're replacing only a specific match, skip if this isn't it
-        off = command->sflags>>3;
+        off = command->sflags>>4;
         if (off && off != ++count) {
           memcpy(l2+l2used, rline, match[0].rm_eo);
           l2used += match[0].rm_eo;
@@ -793,6 +793,7 @@ static void parse_pattern(char **pline, long len)
       if (!TT.nextlen--) break;
     } else if (c == 's') {
       char *end, delim = 0;
+      int flags;
 
       // s/pattern/replacement/flags
 
@@ -845,19 +846,20 @@ resume_s:
 
         if (isspace(*line) && *line != '\n') continue;
 
-        if (0 <= (l = stridx("igp", *line))) command->sflags |= 1<<l;
+        if (0 <= (l = stridx("igpx", *line))) command->sflags |= 1<<l;
         else if (*line == 'I') command->sflags |= 1<<0;
-        else if (!(command->sflags>>3) && 0<(l = strtol(line, &line, 10))) {
-          command->sflags |= l << 3;
+        else if (!(command->sflags>>4) && 0<(l = strtol(line, &line, 10))) {
+          command->sflags |= l << 4;
           line--;
         } else break;
       }
+      flags = (FLAG(r) || (command->sflags&8)) ? REG_EXTENDED : 0;
+      if (command->sflags&1) flags |= REG_ICASE;
 
       // We deferred actually parsing the regex until we had the s///i flag
       // allocating the space was done by extend_string() above
       if (!*TT.remember) command->arg1 = 0;
-      else xregcomp((void *)(command->arg1 + (char *)command), TT.remember,
-        (REG_EXTENDED*!!FLAG(r))|((command->sflags&1)*REG_ICASE));
+      else xregcomp((void *)(command->arg1+(char *)command),TT.remember,flags);
       free(TT.remember);
       TT.remember = 0;
       if (*line == 'w') {
-- 
2.29.2.576.ga3fc446d84-goog

_______________________________________________
Toybox mailing list
Toybox@lists.landley.net
http://lists.landley.net/listinfo.cgi/toybox-landley.net

Reply via email to