On 02/12/2015 23:37, Gioele Barabucci wrote:
Hello,

I am forwarding a bug [1] reported by a Debian user: `read` does not
ignore trailing spaces. The current version of dash is affected by
this bug.

A simple test from the original reporter:

     $ dash -c 'echo "  a b  " | { read v ; echo "<$v>" ; }'
     <a b  >

     $ bash -c 'echo "  a b  " | { read v ; echo "<$v>" ; }'
     <a b>

Other shells like posh and mksh behave like bash.

This is indeed a bug based on the current specification. In the past, the specification was unclear and the dash interpretation was a legitimate one, but currently it explicitly spells out that trailing IFS whitespace gets ignored.

However, unless I'm misreading the spec, mksh and bash don't seem to implement it properly either: only trailing IFS whitespace is supposed to be dropped. IFS non-whitespace is supposed to remain, even at the end of the input. mksh and bash remove it, posh and zsh leave it in:

$ for shell in bash mksh posh zsh; do printf %s: "$shell"; $shell -c 'IFS=,; echo a, | { read v; echo "<$v>"; }'; done
  bash:<a>
  mksh:<a>
  posh:<a,>
  zsh:<a,>

As far as I can tell, the posh/zsh behaviour is the correct behaviour, but I'm not convinced yet my interpretation is correct.

Attached is a not fully tested proof of concept to implement the posh/zsh behaviour in dash by extending ifsbreakup() to allow specifying a maximum number of arguments instead of fixing it up in readcmd_handle_line(). It returns <a b> in your test, and <a,> in mine. Feedback welcome.

Cheers,
Harald van Dijk

This error is reproducible with dash 0.5.7 and with the current master
git master branch, commit 2e5842258bd5b252ffdaa630db09c9a19a9717ca.

[1] https://bugs.debian.org/794965

--
Gioele Barabucci <[email protected]>
diff --git a/src/expand.c b/src/expand.c
index b2d710d..6afd562 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -203,7 +203,7 @@ expandarg(union node *arg, struct arglist *arglist, int 
flag)
         * TODO - EXP_REDIR
         */
        if (flag & EXP_FULL) {
-               ifsbreakup(p, &exparg);
+               ifsbreakup(p, 0, &exparg);
                *exparg.lastp = NULL;
                exparg.lastp = &exparg.list;
                expandmeta(exparg.list, flag);
@@ -1016,9 +1016,11 @@ recordregion(int start, int end, int nulonly)
  * Break the argument string into pieces based upon IFS and add the
  * strings to the argument list.  The regions of the string to be
  * searched for IFS characters have been stored by recordregion.
+ * If maxargs is non-zero, at most maxargs arguments will be created, by
+ * joining together the last arguments.
  */
 void
-ifsbreakup(char *string, struct arglist *arglist)
+ifsbreakup(char *string, int maxargs, struct arglist *arglist)
 {
        struct ifsregion *ifsp;
        struct strlist *sp;
@@ -1054,12 +1056,36 @@ ifsbreakup(char *string, struct arglist *arglist)
                                                start = p;
                                                continue;
                                        }
+                                       /* If only reading one more argument, 
combine any field terminators,
+                                        * except for trailing IFS whitespace. 
*/
+                                       if (maxargs == 1) {
+                                               q = p;
+                                               p++;
+                                               if (ifsspc) {
+                                                       /* Ignore IFS 
whitespace at end */
+                                                       for (;;) {
+                                                               if (p >= string 
+ ifsp->endoff) {
+                                                                       *q = 
'\0';
+                                                                       goto 
add;
+                                                               }
+                                                               if (*p == 
(char)CTLESC)
+                                                                       p++;
+                                                               ifsspc = 
strchr(ifs, *p) && strchr(defifs, *p);
+                                                               p++;
+                                                               if (!ifsspc) {
+                                                                       break;
+                                                               }
+                                                       }
+                                               }
+                                               continue;
+                                       }
                                        *q = '\0';
                                        sp = (struct strlist *)stalloc(sizeof 
*sp);
                                        sp->text = start;
                                        *arglist->lastp = sp;
                                        arglist->lastp = &sp->next;
                                        p++;
+                                       if (maxargs) maxargs--;
                                        if (!nulonly) {
                                                for (;;) {
                                                        if (p >= string + 
ifsp->endoff) {
diff --git a/src/expand.h b/src/expand.h
index 6a90f67..26dc5b4 100644
--- a/src/expand.h
+++ b/src/expand.h
@@ -69,7 +69,7 @@ char *_rmescapes(char *, int);
 int casematch(union node *, char *);
 void recordregion(int, int, int);
 void removerecordregions(int); 
-void ifsbreakup(char *, struct arglist *);
+void ifsbreakup(char *, int, struct arglist *);
 void ifsfree(void);
 
 /* From arith.y */
diff --git a/src/miscbltin.c b/src/miscbltin.c
index b596fd2..39b9c47 100644
--- a/src/miscbltin.c
+++ b/src/miscbltin.c
@@ -67,28 +67,21 @@
  *  less fields than variables -> remaining variables unset.
  *
  *  @param line complete line of input
+ *  @param ac argument count
  *  @param ap argument (variable) list
  *  @param len length of line including trailing '\0'
  */
 static void
-readcmd_handle_line(char *s, char **ap)
+readcmd_handle_line(char *s, int ac, char **ap)
 {
        struct arglist arglist;
        struct strlist *sl;
-       char *backup;
-       char *line;
 
-       /* ifsbreakup will fiddle with stack region... */
-       line = stackblock();
        s = grabstackstr(s);
 
-       /* need a copy, so that delimiters aren't lost
-        * in case there are more fields than variables */
-       backup = sstrdup(line);
-
        arglist.lastp = &arglist.list;
        
-       ifsbreakup(s, &arglist);
+       ifsbreakup(s, ac, &arglist);
        *arglist.lastp = NULL;
        ifsfree();
 
@@ -104,21 +97,6 @@ readcmd_handle_line(char *s, char **ap)
                        return;
                }
 
-               /* remaining fields present, but no variables left. */
-               if (!ap[1] && sl->next) {
-                       size_t offset;
-                       char *remainder;
-
-                       /* FIXME little bit hacky, assuming that ifsbreakup 
-                        * will not modify the length of the string */
-                       offset = sl->text - s;
-                       remainder = backup + offset;
-                       rmescapes(remainder);
-                       setvar(*ap, remainder, 0);
-
-                       return;
-               }
-               
                /* set variable to field */
                rmescapes(sl->text);
                setvar(*ap, sl->text, 0);
@@ -211,7 +189,7 @@ start:
 out:
        recordregion(startloc, p - (char *)stackblock(), 0);
        STACKSTRNUL(p);
-       readcmd_handle_line(p + 1, ap);
+       readcmd_handle_line(p + 1, argc - (ap - argv), ap);
        return status;
 }
 

Reply via email to