Hello.

I am writing a program that reads in a text file, extracts each of the
words and replaces a different document with the words. It works great
until it encounter a non-english letter.

I have tried the following:

self.f = codecs.open(ordliste, 'r', 'utf-8')
where I open the first file.

And
en = unicode(en)
en = en.encode('utf-8')

as well as
en = en.decode('iso-8859-1')

where
each word is entered from the document.

But, still, I get this error:
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe5 in position 17:
ordinal not in range(128)

As well as this:
UnicodeDecodeError: 'utf8' codec can't decode bytes in position 168-170:
invalid data
if I skips the second part.

What is wrong? How can I fix this? I am using ActiveState Python 2.3 and
WinXp.

Thanks in advance...


This is the whole source:

from win32com.client import Dispatch
import time
import codecs

class oversett:
    def __init__(self, ordliste, dokument):
        objWord = Dispatch("Word.Application")
        self.f = codecs.open(ordliste, 'r', 'utf-8')
        #self.f = open(ordliste)
        objDoc =  objWord.Documents.Open(dokument)
        self.objSelection = objWord.Selection

    def kjor(self):
        s = time.clock()
        wdReplaceAll = 2
        wdFindContinue = 1
        t = 1
        for i in self.f.readlines():
            en = i.split('\t')[0]
            #en = str(en).decode('iso-8859-1')
            #en = en.decode('iso-8859-1')
            en = unicode(en)
            en = en.encode('utf-8')
            print en
            to = i.split('\t')[1]
            #to = str(to).decode('iso-8859-1')
            #to = to.decode('iso-8859-1')
            to = unicode(to)
            to = to.encode('utf-8')
            t = t + 1
            if t % 1000 == 0:
                print t
            try:
                self.objSelection.Find.Execute(en, False, True, False,
False, True, True, wdFindContinue, True, to, wdReplaceAll,
False, False, False, False)
            except UnicodeEncodeError:
                print 'pokker'
            except:
                pass

        print time.clock() - s

if __name__ == '__main__':
    n = oversett('c:/ordliste.txt','c:/foo.doc')
    n.kjor()


-- 
This email has been scanned for viruses & spam by Decna as - www.decna.no
Denne e-posten er sjekket for virus & spam av Decna as - www.decna.no

_______________________________________________
Tutor maillist  -  Tutor@python.org
http://mail.python.org/mailman/listinfo/tutor

Reply via email to