Hello, If iso8859-x characters are found in certain positions of input, ksh parsing may get confused.
The problem is that the parser frequently reads a byte with fcmbget() to "peek" the next input character, and then calls fcseek(-LEN) where LEN is the amout of bytes read, to reset the input. But, _fcmbget() has a local static buffer to compose multibyte characters, and fcseek() does not know about it. The logic is far more complex than just needing to make the "compose buffer" in _fcmbget() file (not function) static, make fcseek() a function, etc. The attached, RFC patch works for the case where the problem was detected, as well as utf8 or latin characters. To reproduce the issue, please make sure the file test.sh has iso8859-1 characters (attaching or downloading may convert them to utf8), then run: $ bash test.sh > iso.sh and then: $ ksh -x iso.sh 2>&1 | tail -5 (Need to generate the iso.sh script with bash, or, use a ksh with the proposed patch, otherwise, will also trigger the problem). Thanks, Paulo
diff -up src/cmd/ksh93/sh/fcin.c.orig src/cmd/ksh93/sh/fcin.c
--- src/cmd/ksh93/sh/fcin.c.orig 2017-01-16 16:50:04.357784499 -0200
+++ src/cmd/ksh93/sh/fcin.c 2017-01-16 16:51:22.294508745 -0200
@@ -150,63 +150,19 @@ extern void fcrestore(Fcin_t *fp)
_Fcin = *fp;
}
-/* for testing purposes with small buffers */
-#if defined(IOBSIZE) && (IOBSIZE < 2*MB_LEN_MAX)
-# undef MB_LEN_MAX
-# define MB_LEN_MAX (IOBSIZE/2)
-#endif
-
-struct Extra
-{
- unsigned char buff[2*MB_LEN_MAX];
- unsigned char *next;
-};
-
int _fcmbget(short *len)
{
- static struct Extra extra;
- register int i, c, n;
- if(_Fcin.fcleft)
- {
- if((c = mbsize(extra.next)) < 0)
- c = 1;
- if((_Fcin.fcleft -= c) <=0)
- {
- _Fcin.fcptr = (unsigned char*)fcfirst() - _Fcin.fcleft;
- _Fcin.fcleft = 0;
- }
- *len = c;
- if(c==1)
- c = *extra.next++;
- else if(c==0)
- _Fcin.fcleft = 0;
- else
- c = mbchar(extra.next);
- return(c);
- }
- switch(*len = mbsize(_Fcin.fcptr))
+ register int c;
+ switch (*len = mbsize(_Fcin.fcptr))
{
- case -1:
- if(_Fcin._fcfile && (n=(_Fcin.fclast-_Fcin.fcptr)) < MB_LEN_MAX)
- {
- memcpy(extra.buff, _Fcin.fcptr, n);
- _Fcin.fcptr = _Fcin.fclast;
- for(i=n; i < MB_LEN_MAX+n; i++)
- {
- if((extra.buff[i] = fcgetc(c))==0)
- break;
- }
- _Fcin.fcleft = n;
- extra.next = extra.buff;
- return(fcmbget(len));
- }
+ case -1:
*len = 1;
/* fall through */
- case 0:
- case 1:
+ case 0:
+ case 1:
c=fcget();
break;
- default:
+ default:
c = mbchar(_Fcin.fcptr);
}
return(c);
test.sh
Description: Bourne shell script
_______________________________________________ ast-users mailing list [email protected] http://lists.research.att.com/mailman/listinfo/ast-users
