1) You can't convert just whole buffer after fread() since it can be ended in the middle of multibyte sequence on BUFSIZ edge. Look how GNU utils do it.
OK, now I haven't thought of this aspect. What about this?

#define iswbinary(ch)   (!iswspace((ch)) && iswcntrl((ch)))

int
bin_file(FILE *f)
{
       wint_t   ch = L'\0';
       size_t   i;
       int      ret = 0;

       if (fseek(f, 0L, SEEK_SET) == -1)
               return (0);

       for (i = 0; (i <= BUFSIZ) && (ch != WEOF); i++) {
               ch = fgetwc(f);
               if (iswbinary(ch)) {
                       ret = 1;
                       break;
               }
       }

       rewind(f);
       return (ret);
}

int
mmbin_file(struct mmfile *f)
{
       int      i;
       wchar_t *wbuf;
       size_t   s;

       if ((s = mbstowcs(NULL, f->base, 0)) == -1)
               return (0);

       wbuf = grep_malloc((s + 1) * sizeof(wchar_t));

       if (mbstowcs(wbuf, f->base, s) == -1)
               return (0);

       /* XXX knows too much about mmf internals */
       for (i = 0; i < BUFSIZ && i < f->len; i++)
               if (iswbinary(wbuf[i])) {
                       free(wbuf);
                       return (1);
       }
       free(wbuf);
       return (0);
}

This should be ok, right?

2) Better use iswspace and iswcntrl instead of iswctype.
Ok, changed, thanks. I've also been looking for such functions, but man wctype doesn't mention them.

3) util.c needs to be fixed in several places too.
Yes, I know, I'm just advancing step by step. The next item will be to fix that word boundary handling.

Regards,
Gabor
_______________________________________________
freebsd-hackers@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-hackers
To unsubscribe, send any mail to "[EMAIL PROTECTED]"

Reply via email to