[ https://issues.apache.org/jira/browse/AVRO-1176?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Thiruvalluvan M. G. updated AVRO-1176: -------------------------------------- Resolution: Fixed Status: Resolved (was: Patch Available) Merged Pull Request > ResolvingDecoder fails to resolve or parse schemas > -------------------------------------------------- > > Key: AVRO-1176 > URL: https://issues.apache.org/jira/browse/AVRO-1176 > Project: Apache Avro > Issue Type: Bug > Components: c++ > Affects Versions: 1.7.0 > Reporter: Keh-Li Sheng > Assignee: Thiruvalluvan M. G. > Priority: Major > Labels: patch > Fix For: 1.9.0 > > Attachments: AVRO-1176.patch > > > We have encountered a number of problems using ResolvingDecoder in the C++ > project that we can trace to > 1. Incorrectly swapped reader/writer arguments passed to > ResolvingGrammarGenerator::generate() > 2. Using the wrong tree in ResolvingGrammarGenerator::generate() to generate > the backup parsing stack > 3. A decoder has no "hook" into the generated codec_traits decode methods for > Specific that advances the resolved parse tree through the Symbol::sSkipStart > nodes to ignore extra or unknown fields in the writer's data. > 4. A resolving decoder can generate a valid decoded object even if there are > garbage characters at the end of the input stream if those characters appear > in a field that the reader schema is unaware of > Reader/Writer schemas that fail to parse properly below. First example is the > writer adding a field to a record that is inside an array > {code:title=Added field to record inside array} > { > std::string > readerString("{\"type\":\"record\",\"name\":\"Outer\",\"fields\":[{\"name\":\"outerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}"); > std::string > writerString("{\"type\":\"record\",\"name\":\"Outer\",\"fields\":[{\"name\":\"outerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"third\",\"type\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}]}}}]}"); > std::stringstream readerStream(readerString); > std::stringstream writerStream(writerString); > > avro::ValidSchema readerSchema; > avro::ValidSchema writerSchema; > > avro::compileJsonSchema(readerStream, readerSchema); > avro::compileJsonSchema(writerStream, writerSchema); > > avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, > readerSchema, avro::jsonDecoder(writerSchema)); > struct Outer outer; > > std::stringstream > jsonStream("{\"outerArray\":[{\"first\":{\"field\":\"here is a string > field\"},\"second\":{\"field\":\"here is another string > field\"},\"third\":{\"number\":3}},{\"first\":{\"field\":\"cool\"},\"second\":{\"field\":\"beans\"},\"third\":{\"number\":4}}]}"); > std::auto_ptr<avro::InputStream> input = > avro::istreamInputStream(jsonStream); > decoder->init(*input); > avro::decode(*decoder, outer); > } > {code} > {code:title=Additional array of writer-only record} > { > std::string > readerString("{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}"); > std::string > writerString("{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"innerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}}]}}}]}"); > std::stringstream readerStream(readerString); > std::stringstream writerStream(writerString); > > avro::ValidSchema readerSchema; > avro::ValidSchema writerSchema; > > avro::compileJsonSchema(readerStream, readerSchema); > avro::compileJsonSchema(writerStream, writerSchema); > > avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, > readerSchema, avro::jsonDecoder(writerSchema)); > struct Outer outer; > > std::stringstream > jsonStream("{\"extraArray\":[{\"first\":{\"field\":\"here is a string > field\"},\"second\":{\"field\":\"here is another string > field\"},\"innerArray\":[{\"number\":1},{\"number\":2},{\"number\":3}]},{\"first\":{\"field\":\"second > item in array\"},\"second\":{\"field\":\"inner2 field of > 2\"},\"innerArray\":[{\"number\":4},{\"number\":5}]},{\"first\":{\"field\":\"third > item in array\"},\"second\":{\"field\":\"inner2 field of > 3\"},\"innerArray\":[{\"number\":6}]}]}"); > std::auto_ptr<avro::InputStream> input = > avro::istreamInputStream(jsonStream); > decoder->init(*input); > avro::decode(*decoder, outer); > } > {code} > {code:title=Multiple nesting of unknown records} > { > std::string > readerString("{\"type\":\"record\",\"name\":\"CombinationExtra\",\"fields\":[{\"name\":\"outerAsField\",\"type\":{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}}]}"); > std::string > writerString("{\"type\":\"record\",\"name\":\"CombinationExtra\",\"fields\":[{\"name\":\"outerAsField\",\"type\":{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"innerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}}]}}}]}}]}"); > std::stringstream readerStream(readerString); > std::stringstream writerStream(writerString); > > avro::ValidSchema readerSchema; > avro::ValidSchema writerSchema; > > avro::compileJsonSchema(readerStream, readerSchema); > avro::compileJsonSchema(writerStream, writerSchema); > > avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, > readerSchema, avro::jsonDecoder(writerSchema)); > struct Outer outer; > > std::stringstream > jsonStream("{\"outerAsField\":{\"extraArray\":[{\"first\":{\"field\":\"here > is a string field\"},\"second\":{\"field\":\"here is another string > field\"},\"innerArray\":[{\"number\":1},{\"number\":2},{\"number\":3}]}]}}"); > std::auto_ptr<avro::InputStream> input = > avro::istreamInputStream(jsonStream); > decoder->init(*input); > avro::decode(*decoder, outer); > } > {code} > The following will generate a proper object according to the reader schema > and completely ignores the extraneous characters at the end of the stream. > {code:title=Garbage after appended field of new record} > { > std::string > readerString("{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}"); > std::string > writerString("{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"third\",\"type\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}]}"); > std::stringstream readerStream(readerString); > std::stringstream writerStream(writerString); > > avro::ValidSchema readerSchema; > avro::ValidSchema writerSchema; > > avro::compileJsonSchema(readerStream, readerSchema); > avro::compileJsonSchema(writerStream, writerSchema); > > avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, > readerSchema, avro::jsonDecoder(writerSchema)); > struct Outer outer; > > std::stringstream jsonStream("{\"first\":{\"field\":\"here is a string > field\"},\"second\":{\"field\":\"here is another string > field\"},\"third\":{\"number\":3} GARBAGE_HERE}"); > std::auto_ptr<avro::InputStream> input = > avro::istreamInputStream(jsonStream); > decoder->init(*input); > avro::decode(*decoder, outer); > } > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)