Repository: avro Updated Branches: refs/heads/master db8ed216e -> 50eebb8a0
AVRO-2072: ResolvingGrammarGenerator doesn't implement schema resolution correctly for unions Signed-off-by: Anders Sundelin <[email protected]> Signed-off-by: Gabor Szadovszky <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/avro/repo Commit: http://git-wip-us.apache.org/repos/asf/avro/commit/50eebb8a Tree: http://git-wip-us.apache.org/repos/asf/avro/tree/50eebb8a Diff: http://git-wip-us.apache.org/repos/asf/avro/diff/50eebb8a Branch: refs/heads/master Commit: 50eebb8a0b96565641bb56a0f11e326b3f5cc50b Parents: db8ed21 Author: Nandor Kollar <[email protected]> Authored: Mon Sep 11 14:40:16 2017 +0200 Committer: Gabor Szadovszky <[email protected]> Committed: Mon Sep 11 14:42:21 2017 +0200 ---------------------------------------------------------------------- CHANGES.txt | 3 + .../io/parsing/ResolvingGrammarGenerator.java | 16 ++- .../TestReadingWritingDataInEvolvedSchemas.java | 109 ++++++++++++++++++- .../apache/avro/TestSchemaCompatibility.java | 4 + .../org/apache/avro/TestSchemaValidation.java | 5 +- 5 files changed, 128 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/avro/blob/50eebb8a/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index e78adc4..c2dba84 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -13,6 +13,9 @@ Trunk (not yet released) AVRO-1933: Add more specific error details to SchemaCompatibility class (Anders Sundelin via gabor) + AVRO-2072: ResolvingGrammarGenerator doesn't implement schema resolution correctly for unions + (Nandor Kollar via gabor) + NEW FEATURES AVRO-1704: Java: Add support for single-message encoding. (blue) http://git-wip-us.apache.org/repos/asf/avro/blob/50eebb8a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java ---------------------------------------------------------------------- diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java index cc028cf..5407327 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java @@ -166,7 +166,7 @@ public class ResolvingGrammarGenerator extends ValidatingGrammarGenerator { break; case UNION: - int j = bestBranch(reader, writer, seen); + int j = firstMatchingBranch(reader, writer, seen); if (j >= 0) { Symbol s = generate(writer, reader.getTypes().get(j), seen); return Symbol.seq(Symbol.unionAdjustAction(j, s), Symbol.UNION); @@ -451,7 +451,7 @@ public class ResolvingGrammarGenerator extends ValidatingGrammarGenerator { return false; } - private int bestBranch(Schema r, Schema w, Map<LitS, Symbol> seen) throws IOException { + private int firstMatchingBranch(Schema r, Schema w, Map<LitS, Symbol> seen) throws IOException { Schema.Type vt = w.getType(); // first scan for exact match int j = 0; @@ -491,11 +491,19 @@ public class ResolvingGrammarGenerator extends ValidatingGrammarGenerator { switch (vt) { case INT: switch (b.getType()) { - case LONG: case DOUBLE: - return j; + case LONG: + case DOUBLE: + case FLOAT: + return j; } break; case LONG: + switch (b.getType()) { + case DOUBLE: + case FLOAT: + return j; + } + break; case FLOAT: switch (b.getType()) { case DOUBLE: http://git-wip-us.apache.org/repos/asf/avro/blob/50eebb8a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java ---------------------------------------------------------------------- diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java index 0012876..757aa81 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java @@ -91,6 +91,30 @@ public class TestReadingWritingDataInEvolvedSchemas { .fields() // .name(FIELD_A).type().enumeration("Enum1").symbols("A", "B", "C").noDefault() // .endRecord(); + private static final Schema UNION_INT_RECORD = SchemaBuilder.record(RECORD_A) // + .fields() // + .name(FIELD_A).type().unionOf().intType().endUnion().noDefault() // + .endRecord(); + private static final Schema UNION_LONG_RECORD = SchemaBuilder.record(RECORD_A) // + .fields() // + .name(FIELD_A).type().unionOf().longType().endUnion().noDefault() // + .endRecord(); + private static final Schema UNION_FLOAT_RECORD = SchemaBuilder.record(RECORD_A) // + .fields() // + .name(FIELD_A).type().unionOf().floatType().endUnion().noDefault() // + .endRecord(); + private static final Schema UNION_DOUBLE_RECORD = SchemaBuilder.record(RECORD_A) // + .fields() // + .name(FIELD_A).type().unionOf().doubleType().endUnion().noDefault() // + .endRecord(); + private static final Schema UNION_LONG_FLOAT_RECORD = SchemaBuilder.record(RECORD_A) // + .fields() // + .name(FIELD_A).type().unionOf().floatType().and().longType().endUnion().noDefault() // + .endRecord(); + private static final Schema UNION_FLOAT_DOUBLE_RECORD = SchemaBuilder.record(RECORD_A) // + .fields() // + .name(FIELD_A).type().unionOf().floatType().and().doubleType().endUnion().noDefault() // + .endRecord(); @Test public void doubleWrittenWithUnionSchemaIsConvertedToDoubleSchema() throws Exception { @@ -102,6 +126,87 @@ public class TestReadingWritingDataInEvolvedSchemas { } @Test + public void longWrittenWithUnionSchemaIsConvertedToUnionLongFloatSchema() throws Exception { + Schema writer = UNION_LONG_RECORD; + Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); + byte[] encoded = encodeGenericBlob(record); + Record decoded = decodeGenericBlob(UNION_LONG_FLOAT_RECORD, writer, encoded); + assertEquals(42L, decoded.get(FIELD_A)); + } + + @Test + public void longWrittenWithUnionSchemaIsConvertedToDoubleSchema() throws Exception { + Schema writer = UNION_LONG_RECORD; + Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); + byte[] encoded = encodeGenericBlob(record); + Record decoded = decodeGenericBlob(UNION_DOUBLE_RECORD, writer, encoded); + assertEquals(42.0, decoded.get(FIELD_A)); + } + + @Test + public void intWrittenWithUnionSchemaIsConvertedToDoubleSchema() throws Exception { + Schema writer = UNION_INT_RECORD; + Record record = defaultRecordWithSchema(writer, FIELD_A, 42); + byte[] encoded = encodeGenericBlob(record); + Record decoded = decodeGenericBlob(UNION_DOUBLE_RECORD, writer, encoded); + assertEquals(42.0, decoded.get(FIELD_A)); + } + + @Test + public void intWrittenWithUnionSchemaIsReadableByFloatSchema() throws Exception { + Schema writer = UNION_INT_RECORD; + Record record = defaultRecordWithSchema(writer, FIELD_A, 42); + byte[] encoded = encodeGenericBlob(record); + Record decoded = decodeGenericBlob(FLOAT_RECORD, writer, encoded); + assertEquals(42.0f, decoded.get(FIELD_A)); + } + + @Test + public void intWrittenWithUnionSchemaIsReadableByFloatUnionSchema() throws Exception { + Schema writer = UNION_INT_RECORD; + Record record = defaultRecordWithSchema(writer, FIELD_A, 42); + byte[] encoded = encodeGenericBlob(record); + Record decoded = decodeGenericBlob(UNION_FLOAT_RECORD, writer, encoded); + assertEquals(42.0f, decoded.get(FIELD_A)); + } + + @Test + public void longWrittenWithUnionSchemaIsReadableByFloatSchema() throws Exception { + Schema writer = UNION_LONG_RECORD; + Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); + byte[] encoded = encodeGenericBlob(record); + Record decoded = decodeGenericBlob(FLOAT_RECORD, writer, encoded); + assertEquals(42.0f, decoded.get(FIELD_A)); + } + + @Test + public void longWrittenWithUnionSchemaIsReadableByFloatUnionSchema() throws Exception { + Schema writer = UNION_LONG_RECORD; + Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); + byte[] encoded = encodeGenericBlob(record); + Record decoded = decodeGenericBlob(UNION_FLOAT_RECORD, writer, encoded); + assertEquals(42.0f, decoded.get(FIELD_A)); + } + + @Test + public void longWrittenWithUnionSchemaIsConvertedToLongFloatUnionSchema() throws Exception { + Schema writer = UNION_LONG_RECORD; + Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); + byte[] encoded = encodeGenericBlob(record); + Record decoded = decodeGenericBlob(UNION_LONG_FLOAT_RECORD, writer, encoded); + assertEquals(42L, decoded.get(FIELD_A)); + } + + @Test + public void longWrittenWithUnionSchemaIsConvertedToFloatDoubleUnionSchema() throws Exception { + Schema writer = UNION_LONG_RECORD; + Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); + byte[] encoded = encodeGenericBlob(record); + Record decoded = decodeGenericBlob(UNION_FLOAT_DOUBLE_RECORD, writer, encoded); + assertEquals(42.0F, decoded.get(FIELD_A)); + } + + @Test public void doubleWrittenWithUnionSchemaIsNotConvertedToFloatSchema() throws Exception { expectedException.expect(AvroTypeException.class); expectedException.expectMessage("Found double, expecting float"); @@ -260,7 +365,7 @@ public class TestReadingWritingDataInEvolvedSchemas { private static byte[] encodeGenericBlob(GenericRecord data) throws IOException { - DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(data.getSchema()); + DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(data.getSchema()); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); Encoder encoder = EncoderFactory.get().binaryEncoder(outStream, null); writer.write(data, encoder); @@ -273,7 +378,7 @@ public class TestReadingWritingDataInEvolvedSchemas { if (blob == null) { return null; } - GenericDatumReader<Record> reader = new GenericDatumReader<Record>(); + GenericDatumReader<Record> reader = new GenericDatumReader<>(); reader.setExpected(expectedSchema); reader.setSchema(schemaOfBlob); Decoder decoder = DecoderFactory.get().binaryDecoder(blob, null); http://git-wip-us.apache.org/repos/asf/avro/blob/50eebb8a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java ---------------------------------------------------------------------- diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java index 324c439..6b38cbd 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java @@ -292,6 +292,10 @@ public class TestSchemaCompatibility { // Tests involving unions: new ReaderWriter(EMPTY_UNION_SCHEMA, EMPTY_UNION_SCHEMA), + new ReaderWriter(FLOAT_UNION_SCHEMA, EMPTY_UNION_SCHEMA), + new ReaderWriter(FLOAT_UNION_SCHEMA, INT_UNION_SCHEMA), + new ReaderWriter(FLOAT_UNION_SCHEMA, LONG_UNION_SCHEMA), + new ReaderWriter(FLOAT_UNION_SCHEMA, INT_LONG_UNION_SCHEMA), new ReaderWriter(INT_UNION_SCHEMA, INT_UNION_SCHEMA), new ReaderWriter(INT_STRING_UNION_SCHEMA, STRING_INT_UNION_SCHEMA), new ReaderWriter(INT_UNION_SCHEMA, EMPTY_UNION_SCHEMA), http://git-wip-us.apache.org/repos/asf/avro/blob/50eebb8a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidation.java ---------------------------------------------------------------------- diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidation.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidation.java index f0b8230..4654979 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidation.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidation.java @@ -112,9 +112,8 @@ public class TestSchemaValidation { new ReaderWriter(INT_STRING_UNION_SCHEMA, STRING_INT_UNION_SCHEMA), new ReaderWriter(INT_UNION_SCHEMA, EMPTY_UNION_SCHEMA), new ReaderWriter(LONG_UNION_SCHEMA, INT_UNION_SCHEMA), - // float unions cannot read int or long unions - // new ReaderWriter(FLOAT_UNION_SCHEMA, INT_UNION_SCHEMA), - // new ReaderWriter(FLOAT_UNION_SCHEMA, LONG_UNION_SCHEMA), + new ReaderWriter(FLOAT_UNION_SCHEMA, INT_UNION_SCHEMA), + new ReaderWriter(FLOAT_UNION_SCHEMA, LONG_UNION_SCHEMA), new ReaderWriter(DOUBLE_UNION_SCHEMA, INT_UNION_SCHEMA), new ReaderWriter(LONG_UNION_SCHEMA, EMPTY_UNION_SCHEMA), new ReaderWriter(DOUBLE_UNION_SCHEMA, LONG_UNION_SCHEMA),
