I did some more experimenting and came up with the code below. It shows several methods. When run, the script tests the robustness of each method (roughly), and profiles it using timeit. The results from running on my laptop are shown below the code.
<code> seqs = [# Original: [0xF0, 1, 2, 3, 0xF0, 4, 5, 6, 0xF1, 7, 8, 0xF2, 9, 10, 11, 12, 13, 0xF0, 14, 0xF1, 15], # Single entry: [0xF0, 1, 2, 3], # empty [], # No values with 0x80 set [1, 2, 3, 14, 15], # Does not start with a value that has 0x80 set [1, 2, 3, 14, 15, 0xF0, 1, 2, 3]] expected = [# Original: [[0xF0, 1, 2, 3], [0xF0, 4, 5, 6], [0xF1, 7, 8], [0xF2, 9, 10, 11, 12, 13], [0xF0, 14], [0xF1, 15]], # Single entry: [[0xF0, 1, 2, 3]], # empty [], # No values with 0x80 set [], # Does not start with a value that has 0x80 set [[0xF0, 1, 2, 3]]] def gengroups0(seq): group = None for val in seq: if val & 0x80: if group: yield group group = [] try: group.append(val) except AttributeError: pass if group: yield group def getgroups0(seq): groups = [] group = None for val in seq: if val & 0x80: if group: groups.append(group) group = [] try: group.append(val) except AttributeError: pass if group: groups.append(group) return groups def gengroups1(seq): idxs = [i for i,v in enumerate(seq) if v&0x80] for i,j in zip(idxs,idxs[1:]+[None]): yield seq[i:j] def getgroups1(seq): idxs = [i for i,v in enumerate(seq) if v&0x80] return [seq[i:j] for i,j in zip(idxs,idxs[1:]+[None])] # Similar to the partition method on strings def partition(seq,f=None): if f is None: f = lambda x:x for i,v in enumerate(seq): if f(v): return seq[:i],[seq[i]],seq[i+1:] return seq,[],[] def rpartition(seq, f=None): if f is None: f = lambda x:x for i,v in zip(range(len(seq)-1,-1,-1),seq[::-1]): if f(v): return seq[:i],[seq[i]],seq[i+1:] return ([],[],seq) def gengroups2(seq): while seq: seq, key, val = rpartition(seq, lambda x: x&0x80) if key and val: yield key+val def getgroups2(seq): groups = [] while seq: seq, key, val = rpartition(seq, lambda x: x&0x80) if key and val: groups.append(key+val) return groups def getgroups3(seq): groups = [] for i in seq: if 0x80 & i: groups.append([i]) else: groups[-1].append(i) return [x for x in groups if x] seq = seqs[0] if __name__ == "__main__": from timeit import Timer import __main__ for i in range(4): fname = "getgroups"+str(i) f = getattr(__main__,fname) print fname for i,(s,e) in enumerate(zip(seqs,expected)): print "test %d:"%i, try: if f(s) == e: print "pass" else: print "fail" except: print "error" t = Timer(fname+'(seq)', 'from __main__ import seq,'+fname) print "%.2f usec/pass" % (1000000 * t.timeit(number=100000)/ 100000) print </code> Output from running the above: getgroups0 test 0: pass test 1: pass test 2: pass test 3: pass test 4: pass 14.85 usec/pass getgroups1 test 0: pass test 1: pass test 2: pass test 3: pass test 4: pass 13.81 usec/pass getgroups2 test 0: fail test 1: pass test 2: pass test 3: pass test 4: pass 56.38 usec/pass getgroups3 test 0: pass test 1: pass test 2: pass test 3: error test 4: error 16.23 usec/pass `getgropus2' fails test 0 because it produces a reversed list. That can easily be fixed by re-reversing the output before returning. But, since it is by far the slowest method, I didn't bother. `getgroups3' is a method I got from another post in this thread, just for comparison. >From my benchmarks it looks like getgroups1 is the winner. I didn't scour the thread to test all the methods however. -- http://mail.python.org/mailman/listinfo/python-list