Hi all, I am having trouble adding more data into a file.
Environment: Python 2.6.5, avro-1.3.3-py2.6 Program looks like this from avro import schema, datafile, io OUTFILE_NAME = 'sample.avro' SCHEMA_STR = """{ "type": "record", "name": "bkSampleAvro", "namespace": "bk_avro_example", "fields": [ { "name": "name" , "type": "string" }, { "name": "age" , "type": "int" }, { "name": "address", "type": "string" }, { "name": "value" , "type": "long" } ] }""" SCHEMA = schema.parse(SCHEMA_STR) def write_avro_file(): # Lets generate our data data = {} data['name'] = 'Foo' data['age'] = 19 data['address'] = '10, Bar Eggs Spam' data['value'] = 800 rec_writer = io.DatumWriter(SCHEMA) df_writer = datafile.DataFileWriter( open(OUTFILE_NAME, 'ab'), rec_writer, writers_schema = SCHEMA, codec = 'deflate' ) df_writer.append(data) df_writer.close() def read_avro_file(): rec_reader = io.DatumReader() df_reader = datafile.DataFileReader( open(OUTFILE_NAME, "rb"), rec_reader ) for record in df_reader: print record['name'], record['age'] print record['address'], record['value'] if __name__ == '__main__': # Write an AVRO file first write_avro_file() write_avro_file() # Now, read it read_avro_file() The result looks like Foo 19 10, Bar Eggs Spam 800 Traceback (most recent call last): File "/Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py", line 124, in <module> read_avro_file() File "/Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py", line 112, in read_avro_file for record in df_reader: File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/datafile.py", line 318, in next datum = self.datum_reader.read(self.datum_decoder) File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py", line 411, in read return self.read_data(self.writers_schema, self.readers_schema, decoder) File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py", line 456, in read_data return self.read_record(writers_schema, readers_schema, decoder) File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py", line 648, in read_record field_val = self.read_data(field.type, readers_field.type, decoder) File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py", line 434, in read_data return decoder.read_utf8() File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py", line 210, in read_utf8 return unicode(self.read_bytes(), "utf-8") UnicodeDecodeError: 'utf8' codec can't decode bytes in position 14-15: invalid data if I remove the second write_avro_file() call then everything is fine. How to properly append more data into the file? Thanks, Felix