Hello,

  If iso8859-x characters are found in certain positions of input,
ksh parsing may get confused.

  The problem is that the parser frequently reads a byte with
fcmbget() to "peek" the next input character, and then calls
fcseek(-LEN) where LEN is the amout of bytes read, to reset
the input.

  But, _fcmbget() has a local static buffer to compose multibyte
characters, and fcseek() does not know about it.

  The logic is far more complex than just needing to make the
"compose buffer" in _fcmbget() file (not function) static, make
fcseek() a function, etc.

  The attached, RFC patch works for the case where the problem
was detected, as well as utf8 or latin characters.

  To reproduce the issue, please make sure the file test.sh has
iso8859-1 characters (attaching or downloading may convert them
to utf8), then run:

$ bash test.sh > iso.sh

and then:

$ ksh -x iso.sh 2>&1 | tail -5

(Need to generate the iso.sh script with bash, or, use a ksh with
the proposed patch, otherwise, will also trigger the problem).

Thanks,
Paulo
diff -up src/cmd/ksh93/sh/fcin.c.orig src/cmd/ksh93/sh/fcin.c
--- src/cmd/ksh93/sh/fcin.c.orig	2017-01-16 16:50:04.357784499 -0200
+++ src/cmd/ksh93/sh/fcin.c	2017-01-16 16:51:22.294508745 -0200
@@ -150,63 +150,19 @@ extern void fcrestore(Fcin_t *fp)
 	_Fcin = *fp;
 }
 
-/* for testing purposes with small buffers */
-#if defined(IOBSIZE) && (IOBSIZE < 2*MB_LEN_MAX)
-#   undef MB_LEN_MAX
-#   define MB_LEN_MAX	(IOBSIZE/2)
-#endif
-
-struct Extra
-{
-	unsigned char	buff[2*MB_LEN_MAX];
-	unsigned char	*next;
-};
-
 int _fcmbget(short *len)
 {
-	static struct Extra	extra;
-	register int		i, c, n;
-	if(_Fcin.fcleft)
-	{
-		if((c = mbsize(extra.next)) < 0)
-			c = 1;
-		if((_Fcin.fcleft -= c) <=0)
-		{
-			_Fcin.fcptr = (unsigned char*)fcfirst() - _Fcin.fcleft; 
-			_Fcin.fcleft = 0;
-		}
-		*len = c;
-		if(c==1)
-			c = *extra.next++;
-		else if(c==0)
-			_Fcin.fcleft = 0;
-		else
-			c = mbchar(extra.next);
-		return(c);
-	}
-	switch(*len = mbsize(_Fcin.fcptr))
+	register int		c;
+	switch (*len = mbsize(_Fcin.fcptr))
 	{
-	    case -1:
-		if(_Fcin._fcfile && (n=(_Fcin.fclast-_Fcin.fcptr)) < MB_LEN_MAX)
-		{
-			memcpy(extra.buff, _Fcin.fcptr, n);
-			_Fcin.fcptr = _Fcin.fclast;
-			for(i=n; i < MB_LEN_MAX+n; i++)
-			{
-				if((extra.buff[i] = fcgetc(c))==0)
-					break;
-			}
-			_Fcin.fcleft = n;
-			extra.next = extra.buff;
-			return(fcmbget(len));
-		}
+	case -1:
 		*len = 1;
 		/* fall through */
-	    case 0:
-	    case 1:
+	case 0:
+	case 1:
 		c=fcget();
 		break;
-	    default:
+	default:
 		c = mbchar(_Fcin.fcptr);
 	}
 	return(c);

Attachment: test.sh
Description: Bourne shell script

_______________________________________________
ast-users mailing list
[email protected]
http://lists.research.att.com/mailman/listinfo/ast-users

Reply via email to