> Girish Sahani wrote:
>> Hi,
>>
>> There is a code in my main function which is something like:
>>
>>         while prunedFinal != []:
>>             prunedNew = genColocations(prunedK) ***
>>             tableInstancesNew =
>> genTableInstances(prunedNew,tableInstancesK)
>>             tiCountDict = tiCount(tableInstancesNew)
>>             tiDict = findPI(tableInstancesNew)
>>             prunedFinal = pruneTI(tiDict,pi)
>>             rulesDict = genRules(prunedFinal)
>>             cpDict = findCP(rulesDict)
>>             prunedRulesList = pruneCP(cpDict,cp)
>>             prunedK = prunedFinal
>>             tableInstancesK = tableInstancesNew
>>         else:
>>             return prunedRulesList
>>
>> prunedK and tableInstancesK are defined in the main function.
>
> defined as what ? functions, strings, lists, classes, ... ?
PrunedK is a list that contains 2 length strings and tableInstancesK is a
dictionary,its keys are 2 length strings and values are lists of lists
>
>> Before the
>> main function, i have defined the other functions such as
>> genColocations,genTableInstances,etc. Output of genColocations is to be
>> given to the next function genTableInstances,output of this function to
>> tiCount and findPI, and so on.
>> However i am getting an error at the line marked with ***.
>
> Which error ? How do you hope us to be of any help here if you don't *at
> least* provide the full traceback ? FWIW, the canonical way to do things
> is to:
> - provide minimal *runnable* code exposing the problem
> - explain what you hoped to get
> - explain what you got instead (including full traceback)
>
> As a matter of fact, it's often the case that one solves the problem
> when working on the first point !-)
>
> (snip)
Ohh...I was thinking that posting the whole code would not be a good idea.
The error i get above is:
line 266, in colocationMiner
prunedNew = genColocations(prunedK)

 Anyways, i've attached the file colocations.py. The expected output is a
List of rules (prunedRulesList).These rules are themselves lists.e.g
['ab','c'] denotes the rule ab=>c.
Please do have a look if you have time :).
>
> --
> bruno desthuilliers
> python -c "print '@'.join(['.'.join([w[::-1] for w in p.split('.')]) for
> p in '[EMAIL PROTECTED]'.split('@')])"
> --
> http://mail.python.org/mailman/listinfo/python-list
>
#convert the input file into a dictionary (global ids mapped to feature type) 
and a list of global id pairs
def get_colocations(filename):
    lines = open(filename).read().split("\n")
    colocnDict = {}
    for line in lines:
        n1, b1, n2, b2 = line.split(",")
        n1 = int(n1)
        n2 = int(n2)
        a1 = b1.strip("'")
        a2 = b2.strip("'")
        colocnDict[n1] = a1
        colocnDict[n2] = a2        
    return colocnDict
    
#get pairs of feature ids which are colocated
def getPairs(filename):
    lines = open(filename).read().split("\n")
    pairList = []
    for line in lines:
        n1, b1, n2, b2 = line.split(",")
        pairList.append([n1, n2])
    return pairList    
        
#count number of occurences of each feature in the feature list and store in 
featueCountDict
def getFeatureCount():
    colocnDict = get_colocations("colocations.txt")
    featureList= colocnDict.values()
    featureListUnique = []
    [featureListUnique.append(word) for word in featureList if not 
featureListUnique.count(word)]
    featureCountDict = {}
    for feature in featureListUnique:
        featureCountDict[feature] = featureList.count(feature)
    return featureCountDict

def k2k1(string1, string2):
    for c in string1:
        string2 = string2.replace(c,"",1)
    if len(string2) == 1:
        string1 += string2
    else:
        pass
    return string1

def dictInvert(d):
    dictInv = {}
    for k, v in d.iteritems():
        keys = dictInv.setdefault(v, [])
        keys.append(k)
    return dictInv


#Generate candidate co-locations of size k+1 from size k
#get lower level subsets and prune it by antimonotone property
def genColocations(prunedK):
    prunedNew = substringList = []
    for string1 in prunedK:
        for string2 in prunedK:
            k = len(string2)
            if string1 != string2:
                string1 = k2k1(string1, string2)
                if len(string1) == k+1:
                    prunedNew.append(string1)

            for s in prunedNew:
                substrings = [s[:i]+s[i+1:] for i in range(len(s))]             
   
                for string in substrings:
                    if string not in prunedK:
                        prunedNew.remove(s)
                        continue
                    continue
                    

#tableInstancesNew is a dictionary with keys as k level colocations and values 
as table instances
def genTableInstances(prunedNew,tableInstancesK):
     colocnDict = get_colocations("colocations.txt")
     tableInstancesNew = {}

     for s in p:
         substring1 =  s[:len(s)-1]
         substring2 = s[:len(s)-2]+s[len(s)-1:]      #get 2 substrings
         list1 = tableInstancesK(substring1)
         list2 = tableInstancesK(substring2)
         stringList1 = list(substring1)
         stringList2 = list(substring2)
         inter1 = filter(lambda x:x in stringList1,stringList2)
         indexList1 = indexList2 = []
         i=j=0
         for char in inter1: #get positions of char. of inter1 in string1/2, 
then corr. positions in list1/2
             i = substring1.index(char)
             j = substring2.index(char)
             indexList1.append(i)
             indexList2.append(j)
             continue
             char1 = filter(lambda x:x not in inter1,stringList1)
             char2 = filter(lambda x:x not in inter1,stringList2)
             charList = []
             charList.append(char1[0])
             charList.append(char2[0])
             pairList = getPairs("colocations.txt")

             for ti1 in list1:
                 for ti2 in list2:
                     for i in range(0,len(indexList1)-1):
                         index1 = indexList1(i)
                         index2 = indexList2(j)
                         if j < len(indexList2)-1 & ti1[index1] == ti2[index2]:
                             i+=1
                             j+=1
                             continue
                         elif i == len(indexList2)-1 & j == len(indexList2)-1 & 
ti1[index1] == ti2[index2]:
                            if charList in pairList:
                                k = string2.index(char2[0])
                                instance = ti2(k)
                                tiNew = ti1.append(instance)
                                tiNewList = []
                                tiNewList.append(tiNew)
                            else:
                                break
                         else:
                             break
                 tableInstancesNew[s] = tiNewList
                 continue
     return tableInstancesNew

#Storing tableInstances and their corresponding counts
def tiCount(tableInstancesNew):
    for k,v in tableInstancesNew.iteritems():
        tiCountdict[k] = len(v)
    return tiCountDict
    
#finding participation index of a table instance
#pr=count(no. of distinct instances present  in the colocation) /count(feature)
def findPI(tableInstancesNew):
    pr = prList = []
    i=j=k=0
    colocnDict = get_colocations("colocations.txt")
    for tableInstance in tableInstancesNew.values():
        length = len(tableInstance(0))-1
        for i  in range(0,length,1):
            for rowInstance in tableInstance:
                prList.append(rowInstance[i:i+1])
                continue
            prListUnique = []
            [prListUnique.append(word) for word in prList if not 
prListUnique.count(word)]
            featureCountDict = getFeatureCount()
            if rowInstance[i] in featureCountDict.keys():
                featureCount = featureCountDict(rowInstance[i])
                pr[j] = round(len(prListUnique)*1.0 / featureCount,2)  
                prList = []
                j+=1
                i+=1
                continue  
            pi[k] = min(pr)
            k+=1
            continue
        tiDict = {}
        tiDict = dict(zip(tableInstancesNew.keys(), pr))  #check order
    return tiDict

#Pruning based on participation index
def pruneTI(tiDict,pi):
    for ti in tiDict:
        if tiDict[ti] >= pi:
            prunedDict[ti] = tiDict[ti]
            prunedFinal = prunedDict.keys()
    return prunedFinal


#generates rules list of a colocation. e.g.'abc' gives ['ab','c'] which denotes 
the rule ab => c
def genRulesList(colocation):
    rulesList= [ ]
    length = len(colocation)
    i=0
    colocnList = []
    colocnList.append(colocation)
    while colocNew != colocation:
        colocNew =  colocation +  colocation[0]
        colocNew = colocNew[1:len(colocNew)]
        colocnList.append(colocNew)
        continue
    for colocn in colocnList:
        for j in range(0,length - 1,1):
            rule1 = [colocn[i:i+j],colocn[i+j:length]]
            rule2 = [colocn[i+j:length],colocn[i:i+j]]
            rulesList.append(rule1)
            rulesList.append(rule2)
            continue
        continue
    return rulesList

#generate the colocation rules,rulesDict maps each colocation to its rules list
def genRules(prunedFinal):
    rulesDict = {}
    for colocation in prunedFinal:
        rulesDict[colocation] =  genRulesList(colocation)  
    return rulesDict

#finds CP of a rule list and maps each rule to its cp
def findCP(rulesDict):
    cpDict = {}
    tiCountDict = tiCount(tableInstancesNew)
    for rulesList in rulesDict.values():
        for rule in rulesList:
            string = " ".join(rule)
            list1 = tiCountDict[string]
            count1 = len(list1)
            colocn = rule[0]
            list2 = tiCountDict[colocn]   
            count2 =  len(list2)
            cpDict[rule] = round(count1*1.0 / count2,2)
    return cpDict

#prunes the rules on basis of cp
def pruneCP(cpDict,cp):
    for rule in rulesDict:
        if rulesDict[subset] >= cp:
            prunedRulesDict[ti] = rulesDict[ti]            
            prunedRulesFinal = prunedRulesDict.keys()
            prunedRulesList = []
            prunedRulesList.append(prunedRulesFinal)
    return prunedRulesList

                   
#generate 2-sized tableInstancesK and  prunedK = prunedFinal from colocnDict
def colocationMiner(pi,cp):
        colocationSet = {}
        featureCountDict = getFeatureCount()
        tiCountDict = featureCountDict
        featureList = featureCountDict.keys()
        prunedK = []
        
        for i in range(0,len(featureList) - 1,1):
            for k in range(1,len(featureList),1):
                if i+k <= len(featureList)-1: 
                    colocn = featureList[i] + featureList[i+k]
                    prunedK.append(colocn)
        prunedFinal = prunedK

        colocnDict = get_colocations("colocations.txt")
        tiDict1 = dictInvert(colocnDict)
        tiDict2 = {}
        for k1,v1 in tiDict1.iteritems():
            for k2,v2 in tiDict1.iteritems():
                if k1 != k2  and not tiDict2.has_key(k2 + k1):
                    tiDict2[k1 + k2] = [v1] + [v2]
        l = tiDict2.values()
        for value in l:
            l1 = value[0]
            l2 = value[1]
            l3 = l4 = l5 = []
            for i in range(0,len(l1),1):
                for j in range(0,len(l2),1):
                    l3.append([l1[i],l2[j]])
            l4.append(l3)
        pairList = getPairs("colocations.txt")
        for element in l4:
            for pair in element:
                if pair not in pairList:
                    element.remove(pair)
                l5.append(element)
            tableInstancesK = dict(zip(tiDict2.keys(), l5))   #check order
            for k,v in tableInstancesK.iteritems():
                tiCountDict[k] = len(v)

        while prunedFinal != []:
            prunedNew = genColocations(prunedK)
            tableInstancesNew = genTableInstances(prunedNew,tableInstancesK)
            tiCountDict = tiCount(tableInstancesNew)
            tiDict = findPI(tableInstancesNew)
            prunedFinal = pruneTI(tiDict,pi)
            rulesDict = genRules(prunedFinal)
            cpDict = findCP(rulesDict)
            prunedRulesList = pruneCP(cpDict,cp)
            prunedK = prunedFinal
            tableInstancesK = tableInstancesNew
        else:
            return prunedRulesList
        print "List of colocation rules is ",prunedRulesList
        print tiCountDict
        
-- 
http://mail.python.org/mailman/listinfo/python-list

Reply via email to