[jira] [Commented] (AVRO-2184) Unable to decode JSON data file if a property is renamed in reader schema

ASF GitHub Bot (JIRA) Tue, 11 Dec 2018 10:11:31 -0800


    [ 
https://issues.apache.org/jira/browse/AVRO-2184?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16717676#comment-16717676
 ]


ASF GitHub Bot commented on AVRO-2184:
--------------------------------------

dkulp closed pull request #316: AVRO-2184: Unable to decode JSON data file if a 
property is renamed in reader schema
URL: https://github.com/apache/avro/pull/316
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java 
b/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java
index 1dcb8dd15..21cea4402 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java
@@ -469,7 +469,7 @@ public Symbol doAction(Symbol input, Symbol top) throws 
IOException {
         do {
           String fn = in.getText();
           in.nextToken();
-          if (name.equals(fn)) {
+          if (name.equals(fn) || fa.aliases.contains(fn)) {
             return null;
           } else {
             if (currentReorderBuffer == null) {
diff --git 
a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/JsonGrammarGenerator.java
 
b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/JsonGrammarGenerator.java
index 505c09423..44fc19b08 100644
--- 
a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/JsonGrammarGenerator.java
+++ 
b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/JsonGrammarGenerator.java
@@ -84,7 +84,7 @@ public Symbol generate(Schema sc, Map<LitS, Symbol> seen) {
         int n = 0;
         production[--i] = Symbol.RECORD_START;
         for (Field f : sc.getFields()) {
-          production[--i] = Symbol.fieldAdjustAction(n, f.name());
+          production[--i] = Symbol.fieldAdjustAction(n, f.name(), f.aliases());
           production[--i] = generate(f.schema(), seen);
           production[--i] = Symbol.FIELD_END;
           n++;
diff --git 
a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java 
b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java
index 187942400..df0ee4652 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java
@@ -541,16 +541,18 @@ public SkipAction flatten(Map<Sequence, Sequence> map,
 
   }
 
-  public static FieldAdjustAction fieldAdjustAction(int rindex, String fname) {
-    return new FieldAdjustAction(rindex, fname);
+  public static FieldAdjustAction fieldAdjustAction(int rindex, String fname, 
Set<String> aliases) {
+    return new FieldAdjustAction(rindex, fname, aliases);
   }
 
   public static class FieldAdjustAction extends ImplicitAction {
     public final int rindex;
     public final String fname;
-    @Deprecated public FieldAdjustAction(int rindex, String fname) {
+    public final Set<String> aliases;
+    @Deprecated public FieldAdjustAction(int rindex, String fname, Set<String> 
aliases) {
       this.rindex = rindex;
       this.fname = fname;
+      this.aliases = aliases;
     }
   }
 
diff --git 
a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java
 
b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java
index 85a3ca705..c4ea7e741 100644
--- 
a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java
+++ 
b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java
@@ -21,9 +21,12 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 
+import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Collection;
 
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericData.EnumSymbol;
@@ -39,7 +42,11 @@
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
 
+@RunWith(Parameterized.class)
 public class TestReadingWritingDataInEvolvedSchemas {
 
   private static final String RECORD_A = "RecordA";
@@ -116,6 +123,23 @@
       
.name(FIELD_A).type().unionOf().floatType().and().doubleType().endUnion().noDefault()
 //
       .endRecord();
 
+  @Parameters(name = "encoder = {0}")
+  public static Collection<Object[]> data() {
+    return Arrays.asList(new EncoderType[][]{
+      {EncoderType.BINARY}, {EncoderType.JSON}
+    });
+  }
+
+  public TestReadingWritingDataInEvolvedSchemas(EncoderType encoderType) {
+    this.encoderType = encoderType;
+  }
+
+  private final EncoderType encoderType;
+
+  enum EncoderType {
+    BINARY, JSON
+  }
+
   @Test
   public void doubleWrittenWithUnionSchemaIsConvertedToDoubleSchema() throws 
Exception {
     Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD;
@@ -357,33 +381,52 @@ public void readerWithDefaultValueIsApplied() throws 
Exception {
     assertEquals(314, decoded.get("newFieldWithDefault"));
   }
 
+  @Test
+  public void aliasesInSchema() throws Exception {
+    Schema writer = new Schema.Parser().parse(
+      "{\"namespace\": \"example.avro\", \"type\": \"record\", \"name\": 
\"User\", \"fields\": [" +
+        "{\"name\": \"name\", \"type\": \"int\"}\n" +
+        "]}\n");
+    Schema reader = new Schema.Parser().parse(
+      "{\"namespace\": \"example.avro\", \"type\": \"record\", \"name\": 
\"User\", \"fields\": [" +
+        "{\"name\": \"fname\", \"type\": \"int\", \"aliases\" : [ \"name\" 
]}\n" +
+        "]}\n");
+
+    GenericData.Record record = defaultRecordWithSchema(writer, "name", 1);
+    byte[] encoded = encodeGenericBlob(record);
+    GenericData.Record decoded = decodeGenericBlob(reader, reader, encoded);
+
+    assertEquals(1, decoded.get("fname"));
+  }
+
   private <T> Record defaultRecordWithSchema(Schema schema, String key, T 
value) {
     Record data = new GenericData.Record(schema);
     data.put(key, value);
     return data;
   }
 
-  private static byte[] encodeGenericBlob(GenericRecord data)
-      throws IOException {
+  private byte[] encodeGenericBlob(GenericRecord data) throws IOException {
     DatumWriter<GenericRecord> writer = new 
GenericDatumWriter<>(data.getSchema());
     ByteArrayOutputStream outStream = new ByteArrayOutputStream();
-    Encoder encoder = EncoderFactory.get().binaryEncoder(outStream, null);
+    Encoder encoder = encoderType == EncoderType.BINARY ?
+      EncoderFactory.get().binaryEncoder(outStream, null) :
+      EncoderFactory.get().jsonEncoder(data.getSchema(), outStream);
     writer.write(data, encoder);
     encoder.flush();
     outStream.close();
     return outStream.toByteArray();
   }
 
-  private static Record decodeGenericBlob(Schema expectedSchema, Schema 
schemaOfBlob, byte[] blob) throws IOException {
+  private Record decodeGenericBlob(Schema expectedSchema, Schema schemaOfBlob, 
byte[] blob) throws IOException {
     if (blob == null) {
       return null;
     }
     GenericDatumReader<Record> reader = new GenericDatumReader<>();
     reader.setExpected(expectedSchema);
     reader.setSchema(schemaOfBlob);
-    Decoder decoder = DecoderFactory.get().binaryDecoder(blob, null);
-    Record data = null;
-    data = reader.read(null, decoder);
-    return data;
+    Decoder decoder = encoderType == EncoderType.BINARY ?
+      DecoderFactory.get().binaryDecoder(blob, null) :
+      DecoderFactory.get().jsonDecoder(schemaOfBlob, new 
ByteArrayInputStream(blob));
+    return reader.read(null, decoder);
   }
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Unable to decode JSON data file if a property is renamed in reader schema
> -------------------------------------------------------------------------
>
>                 Key: AVRO-2184
>                 URL: https://issues.apache.org/jira/browse/AVRO-2184
>             Project: Apache Avro
>          Issue Type: Bug
>            Reporter: Prateek Kohli
>            Assignee: Nandor Kollar
>            Priority: Major
>         Attachments: TestAliasesInSchemaEvolution.java
>
>
> I am unable to decode JSON data file if a property is renamed in reader 
> schema:
> As per the documentation it is a compatible change.
> Also, Datatype promotion is not being supported, if I try to change the 
> datatype of favourite_number field in the writer's schema, decoding fails.
> All of the above scenarios are supported if I use Binary decoding instead of 
> JSON.
> *Writer Schema :*
> {"namespace": "example.avro",
>  "type": "record",
>  "name": "User",
>  "fields": [
>  \{"name": "name", "type": "string"},
>  \{"name": "favorite_number", "type": ["int", "null"]},
>  \{"name": "favorite_color", "type": ["string", "null"]}
>  ]}
>  
> *Reader Schema :* 
> {"namespace": "example.avro",
>  "type": "record",
>  "name": "User",
>  "fields": [
>  \{"name": "fname", "type": "string", "aliases" : [ "name" ]},
>  \{"name": "favorite_number", "type": ["int", "null"]},
>  \{"name": "favorite_color", "type": ["string", "null"]}
>  ]}
>  
> *I have written the below code to decode JSON data:*
> FileInputStream fin = new FileInputStream(file);
>  byte fileContent[] = new byte[(int)file.length()];
>  fin.read(fileContent);
>  InputStream input = new ByteArrayInputStream(fileContent);
>  DataInputStream din = new DataInputStream(input);
>  
>  while (true) {
>  try {
>          Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din);
>          ResolvingDecoder resolvingDecoder = 
> DecoderFactory.get().resolvingDecoder(writer,                             
> reader, decoder);
>          Object datum = datumReader.read(null, resolvingDecoder);
>          System.out.println(datum);
>      } catch (EOFException eofException) {
>           break;
>        }
>  }
> *Below is the Exception I get :*
> Exception in thread "main" org.apache.avro.AvroTypeException: Found 
> example.avro.User, expecting example.avro.User, missing required field fname
>  at org.apache.avro.io.ResolvingDecoder.doAction(ResolvingDecoder.java:292)
>  at org.apache.avro.io.parsing.Parser.advance(Parser.java:88)
>  at org.apache.avro.io.ResolvingDecoder.readString(ResolvingDecoder.java:196)
>  at org.apache.avro.io.ResolvingDecoder.readString(ResolvingDecoder.java:201)
>  at 
> org.apache.avro.generic.GenericDatumReader.readString(GenericDatumReader.java:422)
>  at 
> org.apache.avro.generic.GenericDatumReader.readString(GenericDatumReader.java:414)
>  at 
> org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:181)
>  at 
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153)
>  at 
> org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:232)
>  at 
> org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:222)
>  at 
> org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:175)
>  at 
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153)
>  at 
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:145)
>  at 
> com.ericsson.avroTest.avroCheck.WithoutCodeTest.main(WithoutCodeTest.java:134)
>  
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

[jira] [Commented] (AVRO-2184) Unable to decode JSON data file if a property is renamed in reader schema

Reply via email to