Hey Felix,
See the test_append() function at
http://svn.apache.org/viewvc/avro/trunk/lang/py/test/test_datafile.py?view=markup
.
Regards,
Jeff
On Wed, Dec 22, 2010 at 4:59 PM, felix gao gre1...@gmail.com wrote:
Hi all,
I am having trouble adding more data into a file.
Environment: Python 2.6.5, avro-1.3.3-py2.6
Program looks like this
from avro import schema, datafile, io
OUTFILE_NAME = 'sample.avro'
SCHEMA_STR = {
type: record,
name: bkSampleAvro,
namespace: bk_avro_example,
fields: [
{ name: name , type: string },
{ name: age, type: int },
{ name: address, type: string },
{ name: value , type: long }
]
}
SCHEMA = schema.parse(SCHEMA_STR)
def write_avro_file():
# Lets generate our data
data = {}
data['name']= 'Foo'
data['age'] = 19
data['address'] = '10, Bar Eggs Spam'
data['value'] = 800
rec_writer = io.DatumWriter(SCHEMA)
df_writer = datafile.DataFileWriter(
open(OUTFILE_NAME, 'ab'),
rec_writer,
writers_schema = SCHEMA,
codec = 'deflate'
)
df_writer.append(data)
df_writer.close()
def read_avro_file():
rec_reader = io.DatumReader()
df_reader = datafile.DataFileReader(
open(OUTFILE_NAME, rb),
rec_reader
)
for record in df_reader:
print record['name'], record['age']
print record['address'], record['value']
if __name__ == '__main__':
# Write an AVRO file first
write_avro_file()
write_avro_file()
# Now, read it
read_avro_file()
The result looks like
Foo 19
10, Bar Eggs Spam 800
Traceback (most recent call last):
File /Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py,
line 124, in module
read_avro_file()
File /Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py,
line 112, in read_avro_file
for record in df_reader:
File
/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/datafile.py,
line 318, in next
datum = self.datum_reader.read(self.datum_decoder)
File
/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py,
line 411, in read
return self.read_data(self.writers_schema, self.readers_schema,
decoder)
File
/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py,
line 456, in read_data
return self.read_record(writers_schema, readers_schema, decoder)
File
/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py,
line 648, in read_record
field_val = self.read_data(field.type, readers_field.type, decoder)
File
/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py,
line 434, in read_data
return decoder.read_utf8()
File
/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py,
line 210, in read_utf8
return unicode(self.read_bytes(), utf-8)
UnicodeDecodeError: 'utf8' codec can't decode bytes in position 14-15:
invalid data
if I remove the second write_avro_file() call then everything is fine. How
to properly append more data into the file?
Thanks,
Felix