set -e cd /tmp t=n.txt echo 郵編123 > $t echo >> $t wc -l $t for l in C.UTF-8 zh_CN.UTF-8 zh_TW.UTF-8 do printf $l:\\t LC_ALL=$l abiword --to=pdf $t LC_ALL=C.UTF-8 pdftotext -nopgbrk n.pdf -|wc -l #pdffonts n.pdf done
I get 2 n.txt C.UTF-8: 2 zh_CN.UTF-8: 5 zh_TW.UTF-8: 5