Dear Group, I am trying to write a code for pulling data from MySQL at the backend and annotating words and trying to put the results as separated sentences with each line. The code is generally running fine but I am feeling it may be better in the end of giving out sentences, and for small data sets it is okay but with 50,000 news articles it is performing dead slow. I am using Python2.7.11 on Windows 7 with 8GB RAM.
I am trying to copy the code here, for your kind review. import MySQLdb import nltk def sql_connect_NewTest1(): db = MySQLdb.connect(host="localhost", user="*****", passwd="*****", db="abcd_efgh") cur = db.cursor() #cur.execute("SELECT * FROM newsinput limit 0,50000;") #REPORTING RUNTIME ERROR cur.execute("SELECT * FROM newsinput limit 0,50;") dict_open=open("/python27/NewTotalTag.txt","r") #OPENING THE DICTIONARY FILE dict_read=dict_open.read() dict_word=dict_read.split() a4=dict_word #Assignment for code. list1=[] flist1=[] nlist=[] for row in cur.fetchall(): #print row[2] var1=row[3] #print var1 #Printing lines #var2=len(var1) # Length of file var3=var1.split(".") #SPLITTING INTO LINES #print var3 #Printing The Lines #list1.append(var1) var4=len(var3) #Number of all lines #print "No",var4 for line in var3: #print line #flist1.append(line) linew=line.split() for word in linew: if word in a4: windex=a4.index(word) windex1=windex+1 word1=a4[windex1] word2=word+"/"+word1 nlist.append(word2) #print list1 #print nlist elif word not in a4: word3=word+"/"+"NA" nlist.append(word3) #print list1 #print nlist else: print "None" #print "###",flist1 #print len(flist1) #db.close() #print nlist lol = lambda lst, sz: [lst[i:i+sz] for i in range(0, len(lst), sz)] #TRYING TO SPLIT THE RESULTS AS SENTENCES nlist1=lol(nlist,7) #print nlist1 for i in nlist1: string1=" ".join(i) print i #print string1 Thanks in Advance. -- https://mail.python.org/mailman/listinfo/python-list