Hello all I Have problem about , How i can compute accuracy to unigram,bigram and trigram and how i can change the size to iteration separate from 1 to 10 in each stage from iteration train take 90% and training 10%. thank you to read my message
import codecs import nltk from nltk import*
outfile = codecs.open('unigram_tagged_sents_out.txt','w','utf-8') outfile2 = codecs.open('bigram_tagged_sents_out.txt','w','utf-8') outfile3 = codecs.open('trigram_tagged_sents_out.txt','w','utf-8') File1=codecs.open('C:\project\Corpus_word.txt','r','utf_8').readlines() word_pos_list = [] tokens=[] train_sents=[] test_tagged_sents=[] all_test_sents = [] n=10 for line in File1: tokens = line.split('\t') #print '%s\t%s\t%s' % (tokens[0], tokens[1], tokens[2]) word_pos_list.append((tokens[0], tokens[1])) all_test_sents.append(tokens[0]) for t in range(10): size=int(len(word_pos_list)*(0.9)) #print size train_sents.append(word_pos_list[:size]) test_tagged_sents.append(word_pos_list[size:]) test_sents=all_test_sents[size:] print "unigram tagger" #Unigram tagger unigram_tagger = nltk.UnigramTagger(train_sents) tagged_unigram_sents = unigram_tagger.tag(test_sents) print unigram_tagger.evaluate(test_tagged_sents) for (word, tag) in tagged_unigram_sents: print>>outfile, '%s\t%s' % (word, tag) print nltk.accuracy(tagged_unigram_sents,test_sents) #bigram tagger print "Bigram Tagger" bigram_tagger = nltk.BigramTagger(train_sents,backoff= unigram_tagger) tagged_bigram_sents=bigram_tagger.tag(test_sents) print bigram_tagger.evaluate(test_tagged_sents) for (word, tag) in tagged_bigram_sents: print>>outfile2, '%s\t%s' % (word, tag) #Trigram tagger print "Trigram Tagger" trigram_tagger=nltk.TrigramTagger(train_sents,backoff= bigram_tagger) tagged_trigram_sents=trigram_tagger.tag(test_sents) print trigram_tagger.evaluate(test_tagged_sents) for (word, tag) in tagged_trigram_sents: print>>outfile3, '%s\t%s' % (word, tag) outfile.close() outfile2.close() outfile3.close() print 'Done!' #accuracy = unigram_tagger.evaluate(tagged_test_sents) #print 'accuracy = ', accuracy #train_sents.append((word_pos_list[:size])) #print train_sents #test_sents.append(word_pos_list[size:]) #print test_sents #bigram_tagger=nltk.BigramTagger(train_sents) #print bigram_tagger.tag(tokens[:size]) #print bigram_tagger._train(train_sents,cutoff=size) #print bigram_tagger.evaluate(test_sents)
-- https://mail.python.org/mailman/listinfo/python-list