Hello all
I Have problem about , How i can compute accuracy to unigram,bigram and trigram
and how i can change the size to iteration separate from 1 to 10 in each stage 
from iteration train take 90% and training 10%.
thank you to read my message
import codecs
import nltk
from nltk import*

outfile = codecs.open('unigram_tagged_sents_out.txt','w','utf-8')
outfile2 = codecs.open('bigram_tagged_sents_out.txt','w','utf-8')
outfile3 = codecs.open('trigram_tagged_sents_out.txt','w','utf-8')
File1=codecs.open('C:\project\Corpus_word.txt','r','utf_8').readlines()
word_pos_list = []
tokens=[]
train_sents=[]
test_tagged_sents=[]
all_test_sents = []
n=10


for line in File1:
        tokens = line.split('\t')
        #print '%s\t%s\t%s' % (tokens[0], tokens[1], tokens[2])
        word_pos_list.append((tokens[0], tokens[1]))
        all_test_sents.append(tokens[0])

for t in range(10):      
        size=int(len(word_pos_list)*(0.9))
#print size
        train_sents.append(word_pos_list[:size])
        test_tagged_sents.append(word_pos_list[size:])
        test_sents=all_test_sents[size:]

        print "unigram tagger"
#Unigram tagger

        unigram_tagger = nltk.UnigramTagger(train_sents)
        tagged_unigram_sents = unigram_tagger.tag(test_sents)
        print unigram_tagger.evaluate(test_tagged_sents)
        for (word, tag) in tagged_unigram_sents:
             print>>outfile, '%s\t%s' % (word, tag)
        print nltk.accuracy(tagged_unigram_sents,test_sents)     


#bigram tagger
        print "Bigram Tagger"
        bigram_tagger = nltk.BigramTagger(train_sents,backoff= unigram_tagger)
        tagged_bigram_sents=bigram_tagger.tag(test_sents)
        print bigram_tagger.evaluate(test_tagged_sents)
        for (word, tag) in tagged_bigram_sents:
             print>>outfile2, '%s\t%s' % (word, tag)
       

#Trigram tagger
        print "Trigram Tagger"

        trigram_tagger=nltk.TrigramTagger(train_sents,backoff= bigram_tagger)
        tagged_trigram_sents=trigram_tagger.tag(test_sents)
        print trigram_tagger.evaluate(test_tagged_sents)
        for (word, tag) in tagged_trigram_sents:
            print>>outfile3, '%s\t%s' % (word, tag)
outfile.close()
outfile2.close()
outfile3.close()

print 'Done!'
#accuracy = unigram_tagger.evaluate(tagged_test_sents)
#print 'accuracy = ', accuracy
#train_sents.append((word_pos_list[:size]))
#print train_sents
#test_sents.append(word_pos_list[size:])
#print test_sents
#bigram_tagger=nltk.BigramTagger(train_sents)
#print bigram_tagger.tag(tokens[:size])
#print bigram_tagger._train(train_sents,cutoff=size)

#print bigram_tagger.evaluate(test_sents)

         
-- 
https://mail.python.org/mailman/listinfo/python-list

Reply via email to