def consolidate(sets): # http://rosettacode.org/wiki/Set_consolidation#Python:_Iterative setlist = [s for s in sets if s] for i, s1 in enumerate(setlist): if s1: for s2 in setlist[i+1:]: intersection = s1.intersection(s2) if intersection: s2.update(s1) s1.clear() s1 = s2 return [s for s in setlist if s]
def wrapper(seqs): consolidated = consolidate(map(set, seqs)) groupmap = {x: i for i,seq in enumerate(consolidated) for x in seq} output = {} for seq in seqs: target = output.setdefault(groupmap[seq[0]], []) target.append(seq) return list(output.values()) with open("testing1.txt", "r") as myfile: content = myfile.readlines() gr = [['']] for ii in range(0,500): try: gr = [[content[ii]]] + gr except: print "error" + str(content[ii]) #groups = wrapper(content) for i, group in enumerate(wrapper(gr)): print('g{}:'.format(i), group) print("\n") Traceback (most recent call last): File "<stdin>", line 10, in <module> File "<stdin>", line 2, in wrapper File "<stdin>", line 7, in consolidate AttributeError: 'str' object has no attribute 'intersection' >>> content[1] '1,[(-1, 1, -2), (2, -1/2, 1)]\n' >>> content[2] '0,[(1, 0, 0)]\n' >>> content[3] '1,[(1, 0, 1)]\n' >>> content[4] '1,[]\n' >>> content[5] '1,[]\n' then i try to edit to gr = [[set(content[ii])]] + gr it return unhashable type 'set' just would like to search all repeatable pattern for a group of lines -- https://mail.python.org/mailman/listinfo/python-list