On Sat, Sep 12, 2020 at 06:58:47AM +0000, avih wrote: > Note: originally posted to misc@ and was requested by Rafael Possamai > to post to bugs@ ( https://marc.info/?l=openbsd-misc&m=159986950219082&w=4 ) > > Note: I'm not following this list, nor using OpenBSD regularly. > Please CC me for any comment. > > > uname -a (running inside virtual box): > OpenBSD foo.my.domain 6.7 GENERIC.MP#182 amd64 > > > Run the following command: > /bin/sh -c 'IFS=; args() { printf "[%s]" "$@"; }; args 1 2 3' > > Expected result: > [1][2][3] > > Actual result: > [1 2 3] > > > It seems that "$@" in [k]sh with IFS as null combines the arguments > as if it was "$*" when IFS is unset or begins with space, while in > fact the main difference between (quoted) "$@" and $* is that the > former should retain the individual arguments regardless of IFS, > while $* combines them with the first char of IFS (or space if IFS > is unset).
Thanks for the report. This is known and a patch by Martijn Dekker from 2016 exists, but it never made it into the tree despite being tested in snapshots for a while. I don't know/remember the reason - I think it just stalled. Below is the final version of the patch, rebased against -current. Here's Martijn's original email (https://marc.info/?l=openbsd-tech&m=145704771329065&w=2) > Hi all, > > I'm new here and posting at Theo de Raadt's request. I'm developing a > general-purpose cross-platform library for the POSIX shell language and > in the process I encounter lots of bugs in various shells. I will be > posting here a few times with some patches and bug reports against > OpenBSD ksh. Here is the first. > > One uncommon but useful way of writing shell scripts is to start off by > disabling field/word splitting (IFS='') and pathname expansion/globbing > (set -f), re-enabling either or both only for the commands that need > them, e.g. within a subshell. This helps avoid a lot of snags with field > splitting and globbing if you forget to quote a variable somewhere, > adding to the general robustness of a script. (In fact it eliminates > much of the need to quote variable/parameter expansions, with empty > removal remaining as the only issue.) > > Unfortunately OpenBSD ksh (like all pdksh variants except mksh) has a > POSIX compliance bug that is a show stopper for this approach: "$@" does > not generate words (arguments) if IFS is empty. As a result, the > separate command arguments represented by "$@" become a single argument. > So passing on an intact set of positional parameters to a command or > function is impossible with field splitting disabled. > > Of course this is illogical: the quoted special parameter "$@" generates > zero or more words, it doesn't split any words, so the contents of IFS > (or lack thereof) should be neither here nor there. It's old ksh88 > behaviour copied by the original pdksh, but it violates POSIX and it has > been fixed many years ago in ksh93 and all other POSIX shells. > > As I mentioned, mksh fixed it too. I seem to have successfully > backported the mksh fix to OpenBSD ksh. The fix is not too complicated, > but not trivial either, so the MirOS licence would have applied, which I > understand may not be acceptable here. > > So I emailed the author, Thorsten Glaser, explaining the situation. > After some amicable discussion, he granted me a personal licence for > this particular patch, authorising me to sublicence it in whatever way I > please. (Proof is available on request.) > > Under that authorisation, I hereby dedicate the attached patch to the > public domain. In localities where that is not valid, I hereby grant > unlimited permission to use, copy, modify and distribute it, to the > extent permitted by law. > > So this patch makes quoted "$@" act according to the standard even when > IFS is empty. Quoted "$*" is unchanged. For the unspecified (not > standardised) cases of unquoted $@ and $*, this patch makes ksh act like > AT&T ksh93, bash, zsh and (d)ash, which seems safest from a > compatibility point of view. > > The patch is against the OpenBSD 5.8-RELEASE sources. I hope that's ok. > I also added an update for the relevant regression test. > > Thanks, > > - M. Index: bin/ksh/eval.c =================================================================== RCS file: /var/cvs/src/bin/ksh/eval.c,v retrieving revision 1.65 diff -u -p -r1.65 eval.c --- bin/ksh/eval.c 28 Jun 2019 13:34:59 -0000 1.65 +++ bin/ksh/eval.c 12 Sep 2020 08:29:56 -0000 @@ -47,6 +47,8 @@ typedef struct Expand { #define IFS_WORD 0 /* word has chars (or quotes) */ #define IFS_WS 1 /* have seen IFS white-space */ #define IFS_NWS 2 /* have seen IFS non-white-space */ +#define IFS_IWS 3 /* beginning of word, ignore IFS white-space */ +#define IFS_QUOTE 4 /* beg.w/quote, becomes IFS_WORD unless "$@" */ static int varsub(Expand *, char *, char *, int *, int *); static int comsub(Expand *, char *); @@ -217,7 +219,17 @@ expand(char *cp, /* input word */ c = *sp++; break; case OQUOTE: - word = IFS_WORD; + switch (word) { + case IFS_QUOTE: + /* """something */ + word = IFS_WORD; + break; + case IFS_WORD: + break; + default: + word = IFS_QUOTE; + break; + } tilde_ok = 0; quote = 1; continue; @@ -297,6 +309,8 @@ expand(char *cp, /* input word */ if (f&DOBLANK) doblank++; tilde_ok = 0; + if (word == IFS_QUOTE && type != XNULLSUB) + word = IFS_WORD; if (type == XBASE) { /* expand? */ if (!st->next) { SubType *newst; @@ -358,6 +372,11 @@ expand(char *cp, /* input word */ f |= DOTEMP_; /* FALLTHROUGH */ default: + /* '-' '+' '?' */ + if (quote) + word = IFS_WORD; + else if (dp == Xstring(ds, dp)) + word = IFS_IWS; /* Enable tilde expansion */ tilde_ok = 1; f |= DOTILDE; @@ -387,10 +406,17 @@ expand(char *cp, /* input word */ */ x.str = trimsub(str_val(st->var), dp, st->stype); - if (x.str[0] != '\0' || st->quote) + if (x.str[0] != '\0') { + word = IFS_IWS; type = XSUB; - else + } else if (quote) { + word = IFS_WORD; + type = XSUB; + } else { + if (dp == Xstring(ds, dp)) + word = IFS_IWS; type = XNULLSUB; + } if (f&DOBLANK) doblank++; st = st->prev; @@ -422,6 +448,10 @@ expand(char *cp, /* input word */ if (f&DOBLANK) doblank++; st = st->prev; + if (quote || !*x.str) + word = IFS_WORD; + else + word = IFS_IWS; continue; case '?': { @@ -463,12 +493,8 @@ expand(char *cp, /* input word */ type = XBASE; if (f&DOBLANK) { doblank--; - /* not really correct: x=; "$x$@" should - * generate a null argument and - * set A; "${@:+}" shouldn't. - */ - if (dp == Xstring(ds, dp)) - word = IFS_WS; + if (dp == Xstring(ds, dp) && word != IFS_WORD) + word = IFS_IWS; } continue; @@ -503,7 +529,12 @@ expand(char *cp, /* input word */ if (c == 0) { if (quote && !x.split) continue; + if (!quote && word == IFS_WS) + continue; + /* this is so we don't terminate */ c = ' '; + /* now force-emit a word */ + goto emit_word; } if (quote && x.split) { /* terminate word for "$@" */ @@ -554,15 +585,15 @@ expand(char *cp, /* input word */ * ----------------------------------- * IFS_WORD w/WS w/NWS w * IFS_WS -/WS w/NWS - - * IFS_NWS -/NWS w/NWS w + * IFS_NWS -/NWS w/NWS - + * IFS_IWS -/WS w/NWS - * (w means generate a word) - * Note that IFS_NWS/0 generates a word (at&t ksh - * doesn't do this, but POSIX does). */ - if (word == IFS_WORD || - (!ctype(c, C_IFSWS) && c && word == IFS_NWS)) { - char *p; - + if ((word == IFS_WORD) || (word == IFS_QUOTE) || (c && + (word == IFS_IWS || word == IFS_NWS) && + !ctype(c, C_IFSWS))) { + char *p; + emit_word: *dp++ = '\0'; p = Xclose(ds, dp); if (fdo & DOBRACE_) Index: regress/bin/ksh/ifs.t =================================================================== RCS file: /var/cvs/src/regress/bin/ksh/ifs.t,v retrieving revision 1.1 diff -u -p -r1.1 ifs.t --- regress/bin/ksh/ifs.t 2 Dec 2013 20:39:44 -0000 1.1 +++ regress/bin/ksh/ifs.t 12 Sep 2020 08:29:56 -0000 @@ -45,10 +45,10 @@ stdin: showargs 3 $@ showargs 4 "$@" expected-stdout: - <1> <A B C> + <1> <A> <B> <C> <2> <ABC> - <3> <A B C> - <4> <A B C> + <3> <A> <B> <C> + <4> <A> <B> <C> --- name: IFS-space-colon-1