set -e
cd /tmp
t=n.txt
echo 郵編123 > $t
echo >> $t
wc -l $t
for l in C.UTF-8 zh_CN.UTF-8 zh_TW.UTF-8
do
    printf $l:\\t
    LC_ALL=$l abiword --to=pdf $t
    LC_ALL=C.UTF-8 pdftotext -nopgbrk n.pdf -|wc -l
    #pdffonts n.pdf
done
I get
2 n.txt
C.UTF-8:        2
zh_CN.UTF-8:    5
zh_TW.UTF-8:    5

Reply via email to