> Girish Sahani wrote: >> Hi, >> >> There is a code in my main function which is something like: >> >> while prunedFinal != []: >> prunedNew = genColocations(prunedK) *** >> tableInstancesNew = >> genTableInstances(prunedNew,tableInstancesK) >> tiCountDict = tiCount(tableInstancesNew) >> tiDict = findPI(tableInstancesNew) >> prunedFinal = pruneTI(tiDict,pi) >> rulesDict = genRules(prunedFinal) >> cpDict = findCP(rulesDict) >> prunedRulesList = pruneCP(cpDict,cp) >> prunedK = prunedFinal >> tableInstancesK = tableInstancesNew >> else: >> return prunedRulesList >> >> prunedK and tableInstancesK are defined in the main function. > > defined as what ? functions, strings, lists, classes, ... ? PrunedK is a list that contains 2 length strings and tableInstancesK is a dictionary,its keys are 2 length strings and values are lists of lists > >> Before the >> main function, i have defined the other functions such as >> genColocations,genTableInstances,etc. Output of genColocations is to be >> given to the next function genTableInstances,output of this function to >> tiCount and findPI, and so on. >> However i am getting an error at the line marked with ***. > > Which error ? How do you hope us to be of any help here if you don't *at > least* provide the full traceback ? FWIW, the canonical way to do things > is to: > - provide minimal *runnable* code exposing the problem > - explain what you hoped to get > - explain what you got instead (including full traceback) > > As a matter of fact, it's often the case that one solves the problem > when working on the first point !-) > > (snip) Ohh...I was thinking that posting the whole code would not be a good idea. The error i get above is: line 266, in colocationMiner prunedNew = genColocations(prunedK)
Anyways, i've attached the file colocations.py. The expected output is a List of rules (prunedRulesList).These rules are themselves lists.e.g ['ab','c'] denotes the rule ab=>c. Please do have a look if you have time :). > > -- > bruno desthuilliers > python -c "print '@'.join(['.'.join([w[::-1] for w in p.split('.')]) for > p in '[EMAIL PROTECTED]'.split('@')])" > -- > http://mail.python.org/mailman/listinfo/python-list >
#convert the input file into a dictionary (global ids mapped to feature type) and a list of global id pairs def get_colocations(filename): lines = open(filename).read().split("\n") colocnDict = {} for line in lines: n1, b1, n2, b2 = line.split(",") n1 = int(n1) n2 = int(n2) a1 = b1.strip("'") a2 = b2.strip("'") colocnDict[n1] = a1 colocnDict[n2] = a2 return colocnDict #get pairs of feature ids which are colocated def getPairs(filename): lines = open(filename).read().split("\n") pairList = [] for line in lines: n1, b1, n2, b2 = line.split(",") pairList.append([n1, n2]) return pairList #count number of occurences of each feature in the feature list and store in featueCountDict def getFeatureCount(): colocnDict = get_colocations("colocations.txt") featureList= colocnDict.values() featureListUnique = [] [featureListUnique.append(word) for word in featureList if not featureListUnique.count(word)] featureCountDict = {} for feature in featureListUnique: featureCountDict[feature] = featureList.count(feature) return featureCountDict def k2k1(string1, string2): for c in string1: string2 = string2.replace(c,"",1) if len(string2) == 1: string1 += string2 else: pass return string1 def dictInvert(d): dictInv = {} for k, v in d.iteritems(): keys = dictInv.setdefault(v, []) keys.append(k) return dictInv #Generate candidate co-locations of size k+1 from size k #get lower level subsets and prune it by antimonotone property def genColocations(prunedK): prunedNew = substringList = [] for string1 in prunedK: for string2 in prunedK: k = len(string2) if string1 != string2: string1 = k2k1(string1, string2) if len(string1) == k+1: prunedNew.append(string1) for s in prunedNew: substrings = [s[:i]+s[i+1:] for i in range(len(s))] for string in substrings: if string not in prunedK: prunedNew.remove(s) continue continue #tableInstancesNew is a dictionary with keys as k level colocations and values as table instances def genTableInstances(prunedNew,tableInstancesK): colocnDict = get_colocations("colocations.txt") tableInstancesNew = {} for s in p: substring1 = s[:len(s)-1] substring2 = s[:len(s)-2]+s[len(s)-1:] #get 2 substrings list1 = tableInstancesK(substring1) list2 = tableInstancesK(substring2) stringList1 = list(substring1) stringList2 = list(substring2) inter1 = filter(lambda x:x in stringList1,stringList2) indexList1 = indexList2 = [] i=j=0 for char in inter1: #get positions of char. of inter1 in string1/2, then corr. positions in list1/2 i = substring1.index(char) j = substring2.index(char) indexList1.append(i) indexList2.append(j) continue char1 = filter(lambda x:x not in inter1,stringList1) char2 = filter(lambda x:x not in inter1,stringList2) charList = [] charList.append(char1[0]) charList.append(char2[0]) pairList = getPairs("colocations.txt") for ti1 in list1: for ti2 in list2: for i in range(0,len(indexList1)-1): index1 = indexList1(i) index2 = indexList2(j) if j < len(indexList2)-1 & ti1[index1] == ti2[index2]: i+=1 j+=1 continue elif i == len(indexList2)-1 & j == len(indexList2)-1 & ti1[index1] == ti2[index2]: if charList in pairList: k = string2.index(char2[0]) instance = ti2(k) tiNew = ti1.append(instance) tiNewList = [] tiNewList.append(tiNew) else: break else: break tableInstancesNew[s] = tiNewList continue return tableInstancesNew #Storing tableInstances and their corresponding counts def tiCount(tableInstancesNew): for k,v in tableInstancesNew.iteritems(): tiCountdict[k] = len(v) return tiCountDict #finding participation index of a table instance #pr=count(no. of distinct instances present in the colocation) /count(feature) def findPI(tableInstancesNew): pr = prList = [] i=j=k=0 colocnDict = get_colocations("colocations.txt") for tableInstance in tableInstancesNew.values(): length = len(tableInstance(0))-1 for i in range(0,length,1): for rowInstance in tableInstance: prList.append(rowInstance[i:i+1]) continue prListUnique = [] [prListUnique.append(word) for word in prList if not prListUnique.count(word)] featureCountDict = getFeatureCount() if rowInstance[i] in featureCountDict.keys(): featureCount = featureCountDict(rowInstance[i]) pr[j] = round(len(prListUnique)*1.0 / featureCount,2) prList = [] j+=1 i+=1 continue pi[k] = min(pr) k+=1 continue tiDict = {} tiDict = dict(zip(tableInstancesNew.keys(), pr)) #check order return tiDict #Pruning based on participation index def pruneTI(tiDict,pi): for ti in tiDict: if tiDict[ti] >= pi: prunedDict[ti] = tiDict[ti] prunedFinal = prunedDict.keys() return prunedFinal #generates rules list of a colocation. e.g.'abc' gives ['ab','c'] which denotes the rule ab => c def genRulesList(colocation): rulesList= [ ] length = len(colocation) i=0 colocnList = [] colocnList.append(colocation) while colocNew != colocation: colocNew = colocation + colocation[0] colocNew = colocNew[1:len(colocNew)] colocnList.append(colocNew) continue for colocn in colocnList: for j in range(0,length - 1,1): rule1 = [colocn[i:i+j],colocn[i+j:length]] rule2 = [colocn[i+j:length],colocn[i:i+j]] rulesList.append(rule1) rulesList.append(rule2) continue continue return rulesList #generate the colocation rules,rulesDict maps each colocation to its rules list def genRules(prunedFinal): rulesDict = {} for colocation in prunedFinal: rulesDict[colocation] = genRulesList(colocation) return rulesDict #finds CP of a rule list and maps each rule to its cp def findCP(rulesDict): cpDict = {} tiCountDict = tiCount(tableInstancesNew) for rulesList in rulesDict.values(): for rule in rulesList: string = " ".join(rule) list1 = tiCountDict[string] count1 = len(list1) colocn = rule[0] list2 = tiCountDict[colocn] count2 = len(list2) cpDict[rule] = round(count1*1.0 / count2,2) return cpDict #prunes the rules on basis of cp def pruneCP(cpDict,cp): for rule in rulesDict: if rulesDict[subset] >= cp: prunedRulesDict[ti] = rulesDict[ti] prunedRulesFinal = prunedRulesDict.keys() prunedRulesList = [] prunedRulesList.append(prunedRulesFinal) return prunedRulesList #generate 2-sized tableInstancesK and prunedK = prunedFinal from colocnDict def colocationMiner(pi,cp): colocationSet = {} featureCountDict = getFeatureCount() tiCountDict = featureCountDict featureList = featureCountDict.keys() prunedK = [] for i in range(0,len(featureList) - 1,1): for k in range(1,len(featureList),1): if i+k <= len(featureList)-1: colocn = featureList[i] + featureList[i+k] prunedK.append(colocn) prunedFinal = prunedK colocnDict = get_colocations("colocations.txt") tiDict1 = dictInvert(colocnDict) tiDict2 = {} for k1,v1 in tiDict1.iteritems(): for k2,v2 in tiDict1.iteritems(): if k1 != k2 and not tiDict2.has_key(k2 + k1): tiDict2[k1 + k2] = [v1] + [v2] l = tiDict2.values() for value in l: l1 = value[0] l2 = value[1] l3 = l4 = l5 = [] for i in range(0,len(l1),1): for j in range(0,len(l2),1): l3.append([l1[i],l2[j]]) l4.append(l3) pairList = getPairs("colocations.txt") for element in l4: for pair in element: if pair not in pairList: element.remove(pair) l5.append(element) tableInstancesK = dict(zip(tiDict2.keys(), l5)) #check order for k,v in tableInstancesK.iteritems(): tiCountDict[k] = len(v) while prunedFinal != []: prunedNew = genColocations(prunedK) tableInstancesNew = genTableInstances(prunedNew,tableInstancesK) tiCountDict = tiCount(tableInstancesNew) tiDict = findPI(tableInstancesNew) prunedFinal = pruneTI(tiDict,pi) rulesDict = genRules(prunedFinal) cpDict = findCP(rulesDict) prunedRulesList = pruneCP(cpDict,cp) prunedK = prunedFinal tableInstancesK = tableInstancesNew else: return prunedRulesList print "List of colocation rules is ",prunedRulesList print tiCountDict
-- http://mail.python.org/mailman/listinfo/python-list