[ https://issues.apache.org/jira/browse/AVRO-2184?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16717676#comment-16717676 ]
ASF GitHub Bot commented on AVRO-2184: -------------------------------------- dkulp closed pull request #316: AVRO-2184: Unable to decode JSON data file if a property is renamed in reader schema URL: https://github.com/apache/avro/pull/316 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java index 1dcb8dd15..21cea4402 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java @@ -469,7 +469,7 @@ public Symbol doAction(Symbol input, Symbol top) throws IOException { do { String fn = in.getText(); in.nextToken(); - if (name.equals(fn)) { + if (name.equals(fn) || fa.aliases.contains(fn)) { return null; } else { if (currentReorderBuffer == null) { diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/JsonGrammarGenerator.java b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/JsonGrammarGenerator.java index 505c09423..44fc19b08 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/JsonGrammarGenerator.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/JsonGrammarGenerator.java @@ -84,7 +84,7 @@ public Symbol generate(Schema sc, Map<LitS, Symbol> seen) { int n = 0; production[--i] = Symbol.RECORD_START; for (Field f : sc.getFields()) { - production[--i] = Symbol.fieldAdjustAction(n, f.name()); + production[--i] = Symbol.fieldAdjustAction(n, f.name(), f.aliases()); production[--i] = generate(f.schema(), seen); production[--i] = Symbol.FIELD_END; n++; diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java index 187942400..df0ee4652 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java @@ -541,16 +541,18 @@ public SkipAction flatten(Map<Sequence, Sequence> map, } - public static FieldAdjustAction fieldAdjustAction(int rindex, String fname) { - return new FieldAdjustAction(rindex, fname); + public static FieldAdjustAction fieldAdjustAction(int rindex, String fname, Set<String> aliases) { + return new FieldAdjustAction(rindex, fname, aliases); } public static class FieldAdjustAction extends ImplicitAction { public final int rindex; public final String fname; - @Deprecated public FieldAdjustAction(int rindex, String fname) { + public final Set<String> aliases; + @Deprecated public FieldAdjustAction(int rindex, String fname, Set<String> aliases) { this.rindex = rindex; this.fname = fname; + this.aliases = aliases; } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java index 85a3ca705..c4ea7e741 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java @@ -21,9 +21,12 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collection; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.EnumSymbol; @@ -39,7 +42,11 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; +@RunWith(Parameterized.class) public class TestReadingWritingDataInEvolvedSchemas { private static final String RECORD_A = "RecordA"; @@ -116,6 +123,23 @@ .name(FIELD_A).type().unionOf().floatType().and().doubleType().endUnion().noDefault() // .endRecord(); + @Parameters(name = "encoder = {0}") + public static Collection<Object[]> data() { + return Arrays.asList(new EncoderType[][]{ + {EncoderType.BINARY}, {EncoderType.JSON} + }); + } + + public TestReadingWritingDataInEvolvedSchemas(EncoderType encoderType) { + this.encoderType = encoderType; + } + + private final EncoderType encoderType; + + enum EncoderType { + BINARY, JSON + } + @Test public void doubleWrittenWithUnionSchemaIsConvertedToDoubleSchema() throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; @@ -357,33 +381,52 @@ public void readerWithDefaultValueIsApplied() throws Exception { assertEquals(314, decoded.get("newFieldWithDefault")); } + @Test + public void aliasesInSchema() throws Exception { + Schema writer = new Schema.Parser().parse( + "{\"namespace\": \"example.avro\", \"type\": \"record\", \"name\": \"User\", \"fields\": [" + + "{\"name\": \"name\", \"type\": \"int\"}\n" + + "]}\n"); + Schema reader = new Schema.Parser().parse( + "{\"namespace\": \"example.avro\", \"type\": \"record\", \"name\": \"User\", \"fields\": [" + + "{\"name\": \"fname\", \"type\": \"int\", \"aliases\" : [ \"name\" ]}\n" + + "]}\n"); + + GenericData.Record record = defaultRecordWithSchema(writer, "name", 1); + byte[] encoded = encodeGenericBlob(record); + GenericData.Record decoded = decodeGenericBlob(reader, reader, encoded); + + assertEquals(1, decoded.get("fname")); + } + private <T> Record defaultRecordWithSchema(Schema schema, String key, T value) { Record data = new GenericData.Record(schema); data.put(key, value); return data; } - private static byte[] encodeGenericBlob(GenericRecord data) - throws IOException { + private byte[] encodeGenericBlob(GenericRecord data) throws IOException { DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(data.getSchema()); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - Encoder encoder = EncoderFactory.get().binaryEncoder(outStream, null); + Encoder encoder = encoderType == EncoderType.BINARY ? + EncoderFactory.get().binaryEncoder(outStream, null) : + EncoderFactory.get().jsonEncoder(data.getSchema(), outStream); writer.write(data, encoder); encoder.flush(); outStream.close(); return outStream.toByteArray(); } - private static Record decodeGenericBlob(Schema expectedSchema, Schema schemaOfBlob, byte[] blob) throws IOException { + private Record decodeGenericBlob(Schema expectedSchema, Schema schemaOfBlob, byte[] blob) throws IOException { if (blob == null) { return null; } GenericDatumReader<Record> reader = new GenericDatumReader<>(); reader.setExpected(expectedSchema); reader.setSchema(schemaOfBlob); - Decoder decoder = DecoderFactory.get().binaryDecoder(blob, null); - Record data = null; - data = reader.read(null, decoder); - return data; + Decoder decoder = encoderType == EncoderType.BINARY ? + DecoderFactory.get().binaryDecoder(blob, null) : + DecoderFactory.get().jsonDecoder(schemaOfBlob, new ByteArrayInputStream(blob)); + return reader.read(null, decoder); } } ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Unable to decode JSON data file if a property is renamed in reader schema > ------------------------------------------------------------------------- > > Key: AVRO-2184 > URL: https://issues.apache.org/jira/browse/AVRO-2184 > Project: Apache Avro > Issue Type: Bug > Reporter: Prateek Kohli > Assignee: Nandor Kollar > Priority: Major > Attachments: TestAliasesInSchemaEvolution.java > > > I am unable to decode JSON data file if a property is renamed in reader > schema: > As per the documentation it is a compatible change. > Also, Datatype promotion is not being supported, if I try to change the > datatype of favourite_number field in the writer's schema, decoding fails. > All of the above scenarios are supported if I use Binary decoding instead of > JSON. > *Writer Schema :* > {"namespace": "example.avro", > "type": "record", > "name": "User", > "fields": [ > \{"name": "name", "type": "string"}, > \{"name": "favorite_number", "type": ["int", "null"]}, > \{"name": "favorite_color", "type": ["string", "null"]} > ]} > > *Reader Schema :* > {"namespace": "example.avro", > "type": "record", > "name": "User", > "fields": [ > \{"name": "fname", "type": "string", "aliases" : [ "name" ]}, > \{"name": "favorite_number", "type": ["int", "null"]}, > \{"name": "favorite_color", "type": ["string", "null"]} > ]} > > *I have written the below code to decode JSON data:* > FileInputStream fin = new FileInputStream(file); > byte fileContent[] = new byte[(int)file.length()]; > fin.read(fileContent); > InputStream input = new ByteArrayInputStream(fileContent); > DataInputStream din = new DataInputStream(input); > > while (true) { > try { > Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din); > ResolvingDecoder resolvingDecoder = > DecoderFactory.get().resolvingDecoder(writer, > reader, decoder); > Object datum = datumReader.read(null, resolvingDecoder); > System.out.println(datum); > } catch (EOFException eofException) { > break; > } > } > *Below is the Exception I get :* > Exception in thread "main" org.apache.avro.AvroTypeException: Found > example.avro.User, expecting example.avro.User, missing required field fname > at org.apache.avro.io.ResolvingDecoder.doAction(ResolvingDecoder.java:292) > at org.apache.avro.io.parsing.Parser.advance(Parser.java:88) > at org.apache.avro.io.ResolvingDecoder.readString(ResolvingDecoder.java:196) > at org.apache.avro.io.ResolvingDecoder.readString(ResolvingDecoder.java:201) > at > org.apache.avro.generic.GenericDatumReader.readString(GenericDatumReader.java:422) > at > org.apache.avro.generic.GenericDatumReader.readString(GenericDatumReader.java:414) > at > org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:181) > at > org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153) > at > org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:232) > at > org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:222) > at > org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:175) > at > org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153) > at > org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:145) > at > com.ericsson.avroTest.avroCheck.WithoutCodeTest.main(WithoutCodeTest.java:134) > > -- This message was sent by Atlassian JIRA (v7.6.3#76005)