Hi, Followup to this.
Filename can be fixed by attached script. Can you include this in example. Also, I have few scripts I used to fix encoding isues. Osamu
#!/bin/sh # pst_fix_filename script to fix file name encoding by osamu <at> debian dot org # for x in *; do # For Japanse pst file on Windows 95/98/Me # convert file name from shift-jis to UTF-8 (tested) mv "$x" `echo "$x" | iconv -f shift-jis -t utf-8` ## For Chinese pst file on Windows 95/98/Me (osamu's guess) ## convert file name from GB18030 (GBK code point) to UTF-8 ## mv "$x" `echo "$x" | iconv -f gbk -t utf-8` ## For Western European pst file on Windows 95/98/Me (osamu's guess) ## convert file name from iso-8859-1 to UTF-8 ## mv "$x" `echo "$x" | iconv -f iso-8859-1 -t utf-8` ## ## Use probably BIG5 for Taiwan, UHC for Korea, ISO-8859-2 for Polish ... ## done
for x in * ; do echo convert $x # # Here is content # # I use nkf here but iconv can be used too. I make UTF-8 and iso-2022-jp version. # I found content-type is needed for UTF-8 when importing to MacBook mail program # ISO-2022-JP seems to be quite robust and does not need content-type (I dd it anyway) # # For mutt, UTF-8 works better but needs to fix title which was OK initially in pst. # Need some idea but not needed now. (Procmail body filtering?) # # This will convert mail box content to UTF-8 and # -S for shift-jis input # -w for UTF-8 nkf -S -w <$x | \ sed -e 's/[Cc]ontent-[Tt]ype:.*text\/plain.*$/Content-Type: text\/plain; charset=UTF-8/' \ -e 's/[Cc]ontent-[Tt]ype:.*text\/html.*$/Content-Type: text\/html; charset=UTF-8/' \ >$x.utf8x.mbox # -S for shift-jis input # -k for ISO-2022-JP nkf -S -j <$x | \ sed -e 's/[Cc]ontent-[Tt]ype:.*text\/plain.*$/Content-Type: text\/plain; charset=iso-2022-jp/' \ -e 's/[Cc]ontent-[Tt]ype:.*text\/html.*$/Content-Type: text\/html; charset=iso-2022-jp/' \ >$x.2022.mbox # # This is example snip script for Wesetrn european windows pst # (Example of using "iconv") # # iconv -f ISO-8859-1 -t UTF-8 $x | .... done