just playing around - doesn't work with 3.0 due to lack of pattern binding (which i think is coming back in 3.1?)
>>> from collections import defaultdict >>> from itertools import count >>> def mkdict(): return defaultdict(count().next) ... >>> converters = defaultdict(mkdict) >>> def to_float((i, s)): ... try: return float(s) ... except: return converters[i][s] ... >>> def convert(line): ... return map(to_float, zip(count(), line.split(','))) ... >>> convert('1,2,red') [1.0, 2.0, 0] >>> convert('1,2,red') [1.0, 2.0, 0] >>> convert('1,2,blue') [1.0, 2.0, 1] >>> convert('1,2,blue,blue') [1.0, 2.0, 1, 0] andrew cooke wrote: > Carl Banks wrote: >> import collections >> import itertools >> >> def createInitialCluster(fileName): >> fixedPoints = [] >> # quantization is a dict that assigns sequentially-increasing >> numbers >> # to values when reading keys that don't yet exit >> quantization = defaultdict.collections(itertools.count().next) >> with open(fileName, 'r') as f: >> for line in f: >> dimensions = [] >> for s in line.rstrip('\n').split(","): >> if isNumeric(s): >> dimensions.append(float(s)) >> else: >> dimensions.append(float(quantization[s])) >> fixedPoints.append(Point(dimensions)) >> return Cluster(fixedPoints) > > nice reply (i didn't know defaultdict worked like that - very neat). > > two small things i noticed: > > 1 - do you need a separate quantization for each column? the code above > might give, for example, non-contiguous ranges of integers for a > particular column if a string occurs ("by accident" perhaps) in more than > one. > > 2 - don't bother with isNumeric. just return the cast value or catch the > exception: > > [...] > try: > dimensions.append(float(s)) > except: > dimensions.append(float(quantization[s])) > > (not sure float() is needed there either if you're using a recent version > of python - only reason i can think of is to avoid integer division in > older versions). > > andrew > > > -- > http://mail.python.org/mailman/listinfo/python-list > > -- http://mail.python.org/mailman/listinfo/python-list