Hi all, I'm new here and posting at Theo de Raadt's request. I'm developing a general-purpose cross-platform library for the POSIX shell language and in the process I encounter lots of bugs in various shells. I will be posting here a few times with some patches and bug reports against OpenBSD ksh. Here is the first.
One uncommon but useful way of writing shell scripts is to start off by disabling field/word splitting (IFS='') and pathname expansion/globbing (set -f), re-enabling either or both only for the commands that need them, e.g. within a subshell. This helps avoid a lot of snags with field splitting and globbing if you forget to quote a variable somewhere, adding to the general robustness of a script. (In fact it eliminates much of the need to quote variable/parameter expansions, with empty removal remaining as the only issue.) Unfortunately OpenBSD ksh (like all pdksh variants except mksh) has a POSIX compliance bug that is a show stopper for this approach: "$@" does not generate words (arguments) if IFS is empty. As a result, the separate command arguments represented by "$@" become a single argument. So passing on an intact set of positional parameters to a command or function is impossible with field splitting disabled. Of course this is illogical: the quoted special parameter "$@" generates zero or more words, it doesn't split any words, so the contents of IFS (or lack thereof) should be neither here nor there. It's old ksh88 behaviour copied by the original pdksh, but it violates POSIX and it has been fixed many years ago in ksh93 and all other POSIX shells. As I mentioned, mksh fixed it too. I seem to have successfully backported the mksh fix to OpenBSD ksh. The fix is not too complicated, but not trivial either, so the MirOS licence would have applied, which I understand may not be acceptable here. So I emailed the author, Thorsten Glaser, explaining the situation. After some amicable discussion, he granted me a personal licence for this particular patch, authorising me to sublicence it in whatever way I please. (Proof is available on request.) Under that authorisation, I hereby dedicate the attached patch to the public domain. In localities where that is not valid, I hereby grant unlimited permission to use, copy, modify and distribute it, to the extent permitted by law. So this patch makes quoted "$@" act according to the standard even when IFS is empty. Quoted "$*" is unchanged. For the unspecified (not standardised) cases of unquoted $@ and $*, this patch makes ksh act like AT&T ksh93, bash, zsh and (d)ash, which seems safest from a compatibility point of view. The patch is against the OpenBSD 5.8-RELEASE sources. I hope that's ok. I also added an update for the relevant regression test. Thanks, - M.
Index: bin/ksh/eval.c =================================================================== RCS file: /cvs/src/bin/ksh/eval.c,v retrieving revision 1.40 diff -u -p -u -r1.40 eval.c --- bin/ksh/eval.c 14 Sep 2013 20:09:30 -0000 1.40 +++ bin/ksh/eval.c 3 Mar 2016 23:03:10 -0000 @@ -40,6 +40,8 @@ typedef struct Expand { #define IFS_WORD 0 /* word has chars (or quotes) */ #define IFS_WS 1 /* have seen IFS white-space */ #define IFS_NWS 2 /* have seen IFS non-white-space */ +#define IFS_IWS 3 /* beginning of word, ignore IFS white-space */ +#define IFS_QUOTE 4 /* beg.w/quote, becomes IFS_WORD unless "$@" */ static int varsub(Expand *, char *, char *, int *, int *); static int comsub(Expand *, char *); @@ -208,7 +210,17 @@ expand(char *cp, /* input word */ c = *sp++; break; case OQUOTE: - word = IFS_WORD; + switch (word) { + case IFS_QUOTE: + /* """something */ + word = IFS_WORD; + break; + case IFS_WORD: + break; + default: + word = IFS_QUOTE; + break; + } tilde_ok = 0; quote = 1; continue; @@ -288,6 +300,8 @@ expand(char *cp, /* input word */ if (f&DOBLANK) doblank++; tilde_ok = 0; + if (word == IFS_QUOTE && type != XNULLSUB) + word = IFS_WORD; if (type == XBASE) { /* expand? */ if (!st->next) { SubType *newst; @@ -349,6 +363,11 @@ expand(char *cp, /* input word */ f |= DOTEMP_; /* FALLTHROUGH */ default: + /* '-' '+' '?' */ + if (quote) + word = IFS_WORD; + else if (dp == Xstring(ds, dp)) + word = IFS_IWS; /* Enable tilde expansion */ tilde_ok = 1; f |= DOTILDE; @@ -378,10 +397,17 @@ expand(char *cp, /* input word */ */ x.str = trimsub(str_val(st->var), dp, st->stype); - if (x.str[0] != '\0' || st->quote) + if (x.str[0] != '\0') { + word = IFS_IWS; + type = XSUB; + } else if (quote) { + word = IFS_WORD; type = XSUB; - else + } else { + if (dp == Xstring(ds, dp)) + word = IFS_IWS; type = XNULLSUB; + } if (f&DOBLANK) doblank++; st = st->prev; @@ -413,6 +439,7 @@ expand(char *cp, /* input word */ if (f&DOBLANK) doblank++; st = st->prev; + word = quote || (!*x.str) ? IFS_WORD : IFS_IWS; continue; case '?': { @@ -454,12 +481,8 @@ expand(char *cp, /* input word */ type = XBASE; if (f&DOBLANK) { doblank--; - /* not really correct: x=; "$x$@" should - * generate a null argument and - * set A; "${@:+}" shouldn't. - */ - if (dp == Xstring(ds, dp)) - word = IFS_WS; + if (dp == Xstring(ds, dp) && word != IFS_WORD) + word = IFS_IWS; } continue; @@ -494,7 +517,12 @@ expand(char *cp, /* input word */ if (c == 0) { if (quote && !x.split) continue; + if (!quote && word == IFS_WS) + continue; + /* this is so we don't terminate */ c = ' '; + /* now force-emit a word */ + goto emit_word; } if (quote && x.split) { /* terminate word for "$@" */ @@ -545,15 +573,15 @@ expand(char *cp, /* input word */ * ----------------------------------- * IFS_WORD w/WS w/NWS w * IFS_WS -/WS w/NWS - - * IFS_NWS -/NWS w/NWS w + * IFS_NWS -/NWS w/NWS - + * IFS_IWS -/WS w/NWS - * (w means generate a word) - * Note that IFS_NWS/0 generates a word (at&t ksh - * doesn't do this, but POSIX does). */ - if (word == IFS_WORD || - (!ctype(c, C_IFSWS) && c && word == IFS_NWS)) { - char *p; - + if ((word == IFS_WORD) || (word == IFS_QUOTE) || (c && + (word == IFS_IWS || word == IFS_NWS) && + !ctype(c, C_IFSWS))) { + char *p; + emit_word: *dp++ = '\0'; p = Xclose(ds, dp); #ifdef BRACE_EXPAND Index: regress/bin/ksh/ifs.t =================================================================== RCS file: /cvs/src/regress/bin/ksh/ifs.t,v retrieving revision 1.1 diff -u -p -u -r1.1 ifs.t --- regress/bin/ksh/ifs.t 2 Dec 2013 20:39:44 -0000 1.1 +++ regress/bin/ksh/ifs.t 3 Mar 2016 23:08:44 -0000 @@ -45,10 +45,10 @@ stdin: showargs 3 $@ showargs 4 "$@" expected-stdout: - <1> <A B C> + <1> <A> <B> <C> <2> <ABC> - <3> <A B C> - <4> <A B C> + <3> <A> <B> <C> + <4> <A> <B> <C> --- name: IFS-space-colon-1