Dear all, I am new in this world, so apologize if it is a trivial question.
I have created a serialized Avro file using C++. When I try to read the file using python I get an error. I am able to read the file in C++ correctly. The details are: SCHEMA [ { "type" : "record", "namespace" : "Results", "name" : "Statistics", "fields" : [ { "name" : "pcketsGenerated" , "type" : "int" }, { "name" : "avdDelay" , "type" : "double" } ] }, { "type" : "record", "namespace" : "Results", "name" : "PathStatistics", "fields" : [ { "name" : "src" , "type" : "int" }, { "name" : "dst" , "type" : "int" }, { "name" : "statistics" , "type" : "Results.Statistics"}, { "name" : "flowStatVec" , "type" : {"type":"array", "items": "Results.Statistics"}} ] }, { "type" : "record", "namespace" : "Results", "name" : "SimResults", "fields" : [ { "name" : "NetSize" , "type" : "int" }, { "name" : "ItResVec" , "type" : { "type": "array", "items" : { "type" : "record", "namespace":"Results", "name" : "itResults", "fields" : [ {"name" : "PathStatVec", "type" : { "type": "array", "items": "Results.PathStatistics"}} ] } }} ] } ] ERROR {'NetSize': 2, 'ItResVec': [{'PathStatVec': []}, {'PathStatVec': []}, {'PathStatVec': []}, {'PathStatVec': []}]} ----------------------- {'pcketsGenerated': 0, 'avdDelay': 1.67723666e-316} ----------------------- {'pcketsGenerated': 0, 'avdDelay': 1.37603e-318} ----------------------- {'pcketsGenerated': 0, 'avdDelay': 7.905e-321} ----------------------- Traceback (most recent call last): File "./process.py", line 12, in <module> for steps in reader: File "/usr/lib/python3/dist-packages/avro/datafile.py", line 526, in __next__ datum = self.datum_reader.read(self.datum_decoder) File "/usr/lib/python3/dist-packages/avro/io.py", line 481, in read return self.read_data(self.writer_schema, self.reader_schema, decoder) File "/usr/lib/python3/dist-packages/avro/io.py", line 524, in read_data return self.read_union(writer_schema, reader_schema, decoder) File "/usr/lib/python3/dist-packages/avro/io.py", line 686, in read_union raise SchemaResolutionException(fail_msg, writer_schema, reader_schema) avro.io.SchemaResolutionException: Can't access branch index 4 for union with 3 branches Writer's Schema: [ { "type": "record", "name": "Statistics", "namespace": "Results", "fields": [ { "type": "int", "name": "pcketsGenerated" }, { "type": "double", "name": "avdDelay" } ] }, { "type": "record", "name": "PathStatistics", "namespace": "Results", "fields": [ { "type": "int", "name": "src" }, { "type": "int", "name": "dst" }, { "type": "Results.Statistics", "name": "statistics" }, { "type": { "type": "array", "items": "Results.Statistics" }, "name": "flowStatVec" } ] }, { "type": "record", "name": "SimResults", "namespace": "Results", "fields": [ { "type": "int", "name": "NetSize" }, { "type": { "type": "array", "items": { "type": "record", "name": "itResults", "namespace": "Results", "fields": [ { "type": { "type": "array", "items": "Results.PathStatistics" }, "name": "PathStatVec" } ] } }, "name": "ItResVec" } ] } ] Reader's Schema: [ { "type": "record", "name": "Statistics", "namespace": "Results", "fields": [ { "type": "int", "name": "pcketsGenerated" }, { "type": "double", "name": "avdDelay" } ] }, { "type": "record", "name": "PathStatistics", "namespace": "Results", "fields": [ { "type": "int", "name": "src" }, { "type": "int", "name": "dst" }, { "type": "Results.Statistics", "name": "statistics" }, { "type": { "type": "array", "items": "Results.Statistics" }, "name": "flowStatVec" } ] }, { "type": "record", "name": "SimResults", "namespace": "Results", "fields": [ { "type": "int", "name": "NetSize" }, { "type": { "type": "array", "items": { "type": "record", "name": "itResults", "namespace": "Results", "fields": [ { "type": { "type": "array", "items": "Results.PathStatistics" }, "name": "PathStatVec" } ] } }, "name": "ItResVec" } ] } ] C++ code : Write and read #include "schema.hh" #include "avro/Encoder.hh" #include "avro/Decoder.hh" #include "avro/ValidSchema.hh" #include "avro/Compiler.hh" #include "avro/DataFile.hh" #include <fstream> #include <vector> avro::ValidSchema loadSchema(const char* filename) { std::ifstream ifs(filename); avro::ValidSchema result; avro::compileJsonSchema(ifs, result); return result; } int main() { avro::ValidSchema resultsSchema = loadSchema("schema.avdl"); avro::DataFileWriter<SimResults> dfw("test.bin", resultsSchema); SimResults net; net.NetSize = 2; int i = 0; for (int it = 0; it < 2; it ++){ itResults ir; for (int src = 0; src < 2 ; src ++){ for (int dst = 0; dst < 2 ; dst ++){ Statistics gstat; gstat.pcketsGenerated = i; gstat.avdDelay = i; PathStatistics pst; pst.src = src; pst.dst = dst; pst.statistics = gstat; i++; for (int flow = 0; flow < 3 ; flow++){ Statistics stFlow; stFlow.pcketsGenerated = i; stFlow.avdDelay = i; pst.flowStatVec.push_back(stFlow); i++; } ir.PathStatVec.push_back(pst); } } net.ItResVec.push_back(ir); } dfw.write(net); dfw.close(); avro::DataFileReader<SimResults> dfr("test.bin", resultsSchema); SimResults net2; dfr.read(net2); std::cout << net2.NetSize << std::endl; for (int it = 0; it < net2.ItResVec.size(); it ++){ itResults itr1 = net2.ItResVec[it]; for (int src = 0; src < 2 ; src ++){ for (int dst = 0; dst < 2 ; dst ++){ PathStatistics pvst1 = itr1.PathStatVec[src*2 + dst]; for (int flow = 0; flow < 3 ; flow++){ printf("it: %d, src: %d, dst: %d, flow: %d, stat: %d\n", it,src,dst,flow,pvst1.flowStatVec[flow].pcketsGenerated); } } } } PYTHON code: Only read #!/usr/bin/python3 from avro import schema, datafile, io schema = schema.Parse(open("schema.avdl", "r").read()) reader = datafile.DataFileReader(open("test.bin", "rb"), io.DatumReader(reader_schema=schema)) for steps in reader: print (steps) print ("-----------------------") Thanks for your comments -- Sent from: http://apache-avro.679487.n3.nabble.com/Avro-Users-f679479.html