Hi all,

I am having trouble adding more data into a file.

Environment: Python 2.6.5, avro-1.3.3-py2.6

Program looks like this

from avro import schema, datafile, io

OUTFILE_NAME = 'sample.avro'

SCHEMA_STR = """{
    "type": "record",
    "name": "bkSampleAvro",
    "namespace": "bk_avro_example",
    "fields": [
        {   "name": "name"   , "type": "string"   },
        {   "name": "age"    , "type": "int"      },
        {   "name": "address", "type": "string"   },
        {   "name": "value"  , "type": "long"     }
    ]
}"""

SCHEMA = schema.parse(SCHEMA_STR)
def write_avro_file():
    # Lets generate our data
    data = {}
    data['name']    = 'Foo'
    data['age']     = 19
    data['address'] = '10, Bar Eggs Spam'
    data['value']   = 800

    rec_writer = io.DatumWriter(SCHEMA)

    df_writer = datafile.DataFileWriter(
                    open(OUTFILE_NAME, 'ab'),
                    rec_writer,
                    writers_schema = SCHEMA,
                    codec = 'deflate'
                )

    df_writer.append(data)

    df_writer.close()

def read_avro_file():
    rec_reader = io.DatumReader()

    df_reader = datafile.DataFileReader(
                    open(OUTFILE_NAME, "rb"),
                    rec_reader
                )

    for record in df_reader:
        print record['name'], record['age']
        print record['address'], record['value']


if __name__ == '__main__':
    # Write an AVRO file first
    write_avro_file()
    write_avro_file()

    # Now, read it
    read_avro_file()


The result looks like

Foo 19
10, Bar Eggs Spam 800
Traceback (most recent call last):
  File "/Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py", line
124, in <module>
    read_avro_file()
  File "/Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py", line
112, in read_avro_file
    for record in df_reader:
  File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/datafile.py",
line 318, in next
    datum = self.datum_reader.read(self.datum_decoder)
  File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 411, in read
    return self.read_data(self.writers_schema, self.readers_schema, decoder)
  File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 456, in read_data
    return self.read_record(writers_schema, readers_schema, decoder)
  File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 648, in read_record
    field_val = self.read_data(field.type, readers_field.type, decoder)
  File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 434, in read_data
    return decoder.read_utf8()
  File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 210, in read_utf8
    return unicode(self.read_bytes(), "utf-8")
UnicodeDecodeError: 'utf8' codec can't decode bytes in position 14-15:
invalid data



if I remove the second write_avro_file() call then everything is fine.  How
to properly append more data into the file?

Thanks,

Felix

Reply via email to