Hello everyone! Hopefully this will interest some, I have a csv file (can be downloaded from http://www.paulstathamphotography.co.uk/45.txt) which has five fields separated by ~ delimiters. To read this I've been using a csv.DictReader which works in 99% of the cases. Occasionally however the description field has errant \r\n characters in the middle of the record. This causes the reader to assume it's a new record and try to read it.
Here's the code I had import csv fields = ["PROGTITLE", "SUBTITLE", "EPISODE", "DESCRIPTION", "DATE"] delim = '~' lineReader = csv.DictReader(open('45.txt', 'rbU'), delimiter=delim,fieldnames=fields) def FormatDate(date): return date[6:10] +"-" +date[3:5] + "-" +date[0:2] channelPrograms = [] for row in lineReader: row["DATE"] = FormatDate(row["DATE"]) channelPrograms.append(row) Which when run would give me an error as it was trying to pass a NoneType to the FormatDate method, which obviously couldn't handle it. I'd like to find a way to read this record correctly despite the \r \n's in the middle of the description. The problem is I can't change the behaviour in which it reads a record. For the moment I've had to resort to extending the csv.DictReader and overriding the next() method to set the number of fields versus the number of values, if they're not equal I don't add those lines to my list of records. import csv class ChanDictReader(csv.DictReader): def __init__(self, f, fieldnames=None, restkey=None, restval=None, dialect="excel", *args, **kwds): csv.DictReader.__init__(self, f, fieldnames, restkey, restval, dialect, *args, **kwds) self.lf = 0 self.lr = 0 def next(self): if self.line_num == 0: # Used only for its side effect. self.fieldnames row = self.reader.next() self.line_num = self.reader.line_num # unlike the basic reader, we prefer not to return blanks, # because we will typically wind up with a dict full of None # values while row == []: row = self.reader.next() d = dict(zip(self.fieldnames, row)) self.lf = len(self.fieldnames) self.lr = len(row) if self.lf < self.lr: d[self.restkey] = row[self.lf:] elif self.lf > self.lr: for key in self.fieldnames[self.lr:]: d[key] = self.restval return d fields = ["PROGTITLE", "SUBTITLE", "EPISODE", "DESCRIPTION", "DATE"] delim = '~' lineReader = ChanDictReader(open('45.txt', 'rbU'), delimiter=delim,fieldnames=fields) def FormatDate(date): return date[6:10] +"-" +date[3:5] + "-" +date[0:2] channelPrograms = [] for row in lineReader: print "Number of fields: " + str(lineReader.lf) + " Number of values: " + str(lineReader.lr) if lineReader.lf == lineReader.lr: row["DATE"] = FormatDate(row["DATE"]) channelPrograms.append(row) Anyone have any ideas? :o) Paul -- http://mail.python.org/mailman/listinfo/python-list