i thought adding the 's' command's 'x' flag would be a nice warm up to trying to implement `tar --transform` ... but it turns out there's no such flag in GNU sed. we just misread the GNU tar manual (which never strictly claims this to be the case, but also didn't explicitly call out that this isn't also supported in sed).
i've attached the patch anyway, since i only worked this out after the fact, but i'm deliberately not sending the usual "[PATCH]" style mail to call your attention to the fact that this would be a toybox extension if we added it to sed. -- [PATCH] sed: add 'x' flag to the 's' command. The GNU tar manual, when talking about the `tar --transform` option that I need to implement, describes the 'x' flag by saying "regexp is an extended regular expression (see section 'Extended regular expressions' in GNU sed)". Only it turns out that even the latest GNU sed doesn't actually have that flag. It's unique to `tar --transform`. That link is just telling you that the sed manual will explain extended regular expressions, not that GNU sed also supports the 'x' flag. So I don't know whether we want this in toybox sed after all. (It made sense that sed would have such a flag, but no sed that I know of actually does.) --- tests/sed.test | 2 ++ toys/posix/sed.c | 16 +++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-)
From cd797812153da7bbca119b366623fd21df984c3b Mon Sep 17 00:00:00 2001 From: Elliott Hughes <e...@google.com> Date: Thu, 3 Dec 2020 16:38:26 -0800 Subject: [PATCH] sed: add 'x' flag to the 's' command. The GNU tar manual, when talking about the `tar --transform` option that I need to implement, describes the 'x' flag by saying "regexp is an extended regular expression (see section 'Extended regular expressions' in GNU sed)". Only it turns out that even the latest GNU sed doesn't actually have that flag. It's unique to `tar --transform`. That link is just telling you that the sed manual will explain extended regular expressions, not that GNU sed also supports the 'x' flag. So I don't know whether we want this in toybox sed after all. (It made sense that sed would have such a flag, but no sed that I know of actually does.) --- tests/sed.test | 2 ++ toys/posix/sed.c | 16 +++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/sed.test b/tests/sed.test index beb11d5c..c3928e58 100755 --- a/tests/sed.test +++ b/tests/sed.test @@ -185,6 +185,8 @@ testing '\n with empty capture' \ testing '\n too high' \ 'sed -E "s/(.*)/\2/p" 2>/dev/null || echo OK' "OK\n" "" "foo" +toyonly testing 's///x' 'sed "s/(hello )?(world)/\2/x"' "world" "" "hello world" + # Performance test X=x; Y=20; while [ $Y -gt 0 ]; do X=$X$X; Y=$(($Y-1)); done testing 'megabyte s/x/y/g (20 sec timeout)' \ diff --git a/toys/posix/sed.c b/toys/posix/sed.c index 8fbef0cb..9bd05034 100644 --- a/toys/posix/sed.c +++ b/toys/posix/sed.c @@ -147,7 +147,7 @@ struct sedcmd { int rmatch[2]; // offset of regex struct for prefix matches (/abc/,/def/p) int arg1, arg2, w; // offset of two arguments per command, plus s//w filename unsigned not, hit; - unsigned sflags; // s///flag bits: i=1, g=2, p=4 + unsigned sflags; // s///flag bits: i=1, g=2, p=4, x=8 char c; // action }; @@ -441,7 +441,7 @@ static void sed_line(char **pline, long plen) } else zmatch = 0; // If we're replacing only a specific match, skip if this isn't it - off = command->sflags>>3; + off = command->sflags>>4; if (off && off != ++count) { memcpy(l2+l2used, rline, match[0].rm_eo); l2used += match[0].rm_eo; @@ -793,6 +793,7 @@ static void parse_pattern(char **pline, long len) if (!TT.nextlen--) break; } else if (c == 's') { char *end, delim = 0; + int flags; // s/pattern/replacement/flags @@ -845,19 +846,20 @@ resume_s: if (isspace(*line) && *line != '\n') continue; - if (0 <= (l = stridx("igp", *line))) command->sflags |= 1<<l; + if (0 <= (l = stridx("igpx", *line))) command->sflags |= 1<<l; else if (*line == 'I') command->sflags |= 1<<0; - else if (!(command->sflags>>3) && 0<(l = strtol(line, &line, 10))) { - command->sflags |= l << 3; + else if (!(command->sflags>>4) && 0<(l = strtol(line, &line, 10))) { + command->sflags |= l << 4; line--; } else break; } + flags = (FLAG(r) || (command->sflags&8)) ? REG_EXTENDED : 0; + if (command->sflags&1) flags |= REG_ICASE; // We deferred actually parsing the regex until we had the s///i flag // allocating the space was done by extend_string() above if (!*TT.remember) command->arg1 = 0; - else xregcomp((void *)(command->arg1 + (char *)command), TT.remember, - (REG_EXTENDED*!!FLAG(r))|((command->sflags&1)*REG_ICASE)); + else xregcomp((void *)(command->arg1+(char *)command),TT.remember,flags); free(TT.remember); TT.remember = 0; if (*line == 'w') { -- 2.29.2.576.ga3fc446d84-goog
_______________________________________________ Toybox mailing list Toybox@lists.landley.net http://lists.landley.net/listinfo.cgi/toybox-landley.net