[jira] [Commented] (FLINK-13807) Flink-avro unit tests fails if the character encoding in the environment is not default to UTF-8
[ https://issues.apache.org/jira/browse/FLINK-13807?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16913072#comment-16913072 ] Chesnay Schepler commented on FLINK-13807: -- [~Tison] That's exactly what we should do. > Flink-avro unit tests fails if the character encoding in the environment is > not default to UTF-8 > > > Key: FLINK-13807 > URL: https://issues.apache.org/jira/browse/FLINK-13807 > Project: Flink > Issue Type: Bug >Affects Versions: 1.8.0 >Reporter: Ethan Li >Priority: Minor > Attachments: patch.diff > > > On Flink release-1.8 branch: > {code:java} > [ERROR] Tests run: 12, Failures: 4, Errors: 0, Skipped: 0, Time elapsed: 4.81 > s <<< FAILURE! - in > org.apache.flink.formats.avro.typeutils.AvroTypeExtractionTest > [ERROR] testSimpleAvroRead[Execution mode = > CLUSTER](org.apache.flink.formats.avro.typeutils.AvroTypeExtractionTest) > Time elapsed: 0.438 s <<< FAILURE! > java.lang.AssertionError: > Different elements in arrays: expected 2 elements and received 2 > files: [/tmp/junit5386344396421857812/junit6023978980792200274.tmp/4, > /tmp/junit5386344396421857812/junit6023978980792200274.tmp/2, > /tmp/junit5386344396421857812/junit6023978980792200274.tmp/1, > /tmp/junit5386344396421857812/junit6023978980792200274.tmp/3] > expected: [{"name": "Alyssa", "favorite_number": 256, "favorite_color": > null, "type_long_test": null, "type_double_test": 123.45, "type_null_test": > null, "type_bool_test": true, "type_array_string": ["ELEMENT 1", "ELEMENT > 2"], "type_array_boolean": [true, false], "type_nullable_array": null, > "type_enum": "GREEN", "type_map": {"KEY 2": 17554, "KEY 1": 8546456}, > "type_fixed": null, "type_union": null, "type_nested": {"num": 239, "street": > "Baker Street", "city": "London", "state": "London", "zip": "NW1 6XE"}, > "type_bytes": {"bytes": > "\u\u\u\u\u\u\u\u\u\u"}, "type_date": > 2014-03-01, "type_time_millis": 12:12:12.000, "type_time_micros": 123456, > "type_timestamp_millis": 2014-03-01T12:12:12.321Z, "type_timestamp_micros": > 123456, "type_decimal_bytes": {"bytes": "\u0007?"}, "type_decimal_fixed": [7, > -48]}, {"name": "Charlie", "favorite_number": null, "favorite_color": "blue", > "type_long_test": 1337, "type_double_test": 1.337, "type_null_test": null, > "type_bool_test": false, "type_array_string": [], "type_array_boolean": [], > "type_nullable_array": null, "type_enum": "RED", "type_map": {}, > "type_fixed": null, "type_union": null, "type_nested": {"num": 239, "street": > "Baker Street", "city": "London", "state": "London", "zip": "NW1 6XE"}, > "type_bytes": {"bytes": > "\u\u\u\u\u\u\u\u\u\u"}, "type_date": > 2014-03-01, "type_time_millis": 12:12:12.000, "type_time_micros": 123456, > "type_timestamp_millis": 2014-03-01T12:12:12.321Z, "type_timestamp_micros": > 123456, "type_decimal_bytes": {"bytes": "\u0007?"}, "type_decimal_fixed": [7, > -48]}] > received: [{"name": "Alyssa", "favorite_number": 256, "favorite_color": > null, "type_long_test": null, "type_double_test": 123.45, "type_null_test": > null, "type_bool_test": true, "type_array_string": ["ELEMENT 1", "ELEMENT > 2"], "type_array_boolean": [true, false], "type_nullable_array": null, > "type_enum": "GREEN", "type_map": {"KEY 2": 17554, "KEY 1": 8546456}, > "type_fixed": null, "type_union": null, "type_nested": {"num": 239, "street": > "Baker Street", "city": "London", "state": "London", "zip": "NW1 6XE"}, > "type_bytes": {"bytes": > "\u\u\u\u\u\u\u\u\u\u"}, "type_date": > 2014-03-01, "type_time_millis": 12:12:12.000, "type_time_micros": 123456, > "type_timestamp_millis": 2014-03-01T12:12:12.321Z, "type_timestamp_micros": > 123456, "type_decimal_bytes": {"bytes": "\u0007??"}, "type_decimal_fixed": > [7, -48]}, {"name": "Charlie", "favorite_number": null, "favorite_color": > "blue", "type_long_test": 1337, "type_double_test": 1.337, "type_null_test": > null, "type_bool_test": false, "type_array_string": [], "type_array_boolean": > [], "type_nullable_array": null, "type_enum": "RED", "type_map": {}, > "type_fixed": null, "type_union": null, "type_nested": {"num": 239, "street": > "Baker Street", "city": "London", "state": "London", "zip": "NW1 6XE"}, > "type_bytes": {"bytes": > "\u\u\u\u\u\u\u\u\u\u"}, "type_date": > 2014-03-01, "type_time_millis": 12:12:12.000, "type_time_micros": 123456, > "type_timestamp_millis": 2014-03-01T12:12:12.321Z, "type_timestamp_micros": > 123456, "type_decimal_bytes": {"bytes": "\u0007??"}, "type_decimal_fixed": > [7, -48]}] > at >
[jira] [Commented] (FLINK-13807) Flink-avro unit tests fails if the character encoding in the environment is not default to UTF-8
[ https://issues.apache.org/jira/browse/FLINK-13807?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16912431#comment-16912431 ] Ethan Li commented on FLINK-13807: -- Thanks for fixing it. Sorry I am not able to test it in a short time. If you change your environment to ANSI_X3.4-1968 and if the unit tests pass, it should be good enough. > Flink-avro unit tests fails if the character encoding in the environment is > not default to UTF-8 > > > Key: FLINK-13807 > URL: https://issues.apache.org/jira/browse/FLINK-13807 > Project: Flink > Issue Type: Bug >Affects Versions: 1.8.0 >Reporter: Ethan Li >Priority: Minor > Attachments: patch.diff > > > On Flink release-1.8 branch: > {code:java} > [ERROR] Tests run: 12, Failures: 4, Errors: 0, Skipped: 0, Time elapsed: 4.81 > s <<< FAILURE! - in > org.apache.flink.formats.avro.typeutils.AvroTypeExtractionTest > [ERROR] testSimpleAvroRead[Execution mode = > CLUSTER](org.apache.flink.formats.avro.typeutils.AvroTypeExtractionTest) > Time elapsed: 0.438 s <<< FAILURE! > java.lang.AssertionError: > Different elements in arrays: expected 2 elements and received 2 > files: [/tmp/junit5386344396421857812/junit6023978980792200274.tmp/4, > /tmp/junit5386344396421857812/junit6023978980792200274.tmp/2, > /tmp/junit5386344396421857812/junit6023978980792200274.tmp/1, > /tmp/junit5386344396421857812/junit6023978980792200274.tmp/3] > expected: [{"name": "Alyssa", "favorite_number": 256, "favorite_color": > null, "type_long_test": null, "type_double_test": 123.45, "type_null_test": > null, "type_bool_test": true, "type_array_string": ["ELEMENT 1", "ELEMENT > 2"], "type_array_boolean": [true, false], "type_nullable_array": null, > "type_enum": "GREEN", "type_map": {"KEY 2": 17554, "KEY 1": 8546456}, > "type_fixed": null, "type_union": null, "type_nested": {"num": 239, "street": > "Baker Street", "city": "London", "state": "London", "zip": "NW1 6XE"}, > "type_bytes": {"bytes": > "\u\u\u\u\u\u\u\u\u\u"}, "type_date": > 2014-03-01, "type_time_millis": 12:12:12.000, "type_time_micros": 123456, > "type_timestamp_millis": 2014-03-01T12:12:12.321Z, "type_timestamp_micros": > 123456, "type_decimal_bytes": {"bytes": "\u0007?"}, "type_decimal_fixed": [7, > -48]}, {"name": "Charlie", "favorite_number": null, "favorite_color": "blue", > "type_long_test": 1337, "type_double_test": 1.337, "type_null_test": null, > "type_bool_test": false, "type_array_string": [], "type_array_boolean": [], > "type_nullable_array": null, "type_enum": "RED", "type_map": {}, > "type_fixed": null, "type_union": null, "type_nested": {"num": 239, "street": > "Baker Street", "city": "London", "state": "London", "zip": "NW1 6XE"}, > "type_bytes": {"bytes": > "\u\u\u\u\u\u\u\u\u\u"}, "type_date": > 2014-03-01, "type_time_millis": 12:12:12.000, "type_time_micros": 123456, > "type_timestamp_millis": 2014-03-01T12:12:12.321Z, "type_timestamp_micros": > 123456, "type_decimal_bytes": {"bytes": "\u0007?"}, "type_decimal_fixed": [7, > -48]}] > received: [{"name": "Alyssa", "favorite_number": 256, "favorite_color": > null, "type_long_test": null, "type_double_test": 123.45, "type_null_test": > null, "type_bool_test": true, "type_array_string": ["ELEMENT 1", "ELEMENT > 2"], "type_array_boolean": [true, false], "type_nullable_array": null, > "type_enum": "GREEN", "type_map": {"KEY 2": 17554, "KEY 1": 8546456}, > "type_fixed": null, "type_union": null, "type_nested": {"num": 239, "street": > "Baker Street", "city": "London", "state": "London", "zip": "NW1 6XE"}, > "type_bytes": {"bytes": > "\u\u\u\u\u\u\u\u\u\u"}, "type_date": > 2014-03-01, "type_time_millis": 12:12:12.000, "type_time_micros": 123456, > "type_timestamp_millis": 2014-03-01T12:12:12.321Z, "type_timestamp_micros": > 123456, "type_decimal_bytes": {"bytes": "\u0007??"}, "type_decimal_fixed": > [7, -48]}, {"name": "Charlie", "favorite_number": null, "favorite_color": > "blue", "type_long_test": 1337, "type_double_test": 1.337, "type_null_test": > null, "type_bool_test": false, "type_array_string": [], "type_array_boolean": > [], "type_nullable_array": null, "type_enum": "RED", "type_map": {}, > "type_fixed": null, "type_union": null, "type_nested": {"num": 239, "street": > "Baker Street", "city": "London", "state": "London", "zip": "NW1 6XE"}, > "type_bytes": {"bytes": > "\u\u\u\u\u\u\u\u\u\u"}, "type_date": > 2014-03-01, "type_time_millis": 12:12:12.000, "type_time_micros": 123456, > "type_timestamp_millis": 2014-03-01T12:12:12.321Z, "type_timestamp_micros": > 123456, "type_decimal_bytes": {"bytes":
[jira] [Commented] (FLINK-13807) Flink-avro unit tests fails if the character encoding in the environment is not default to UTF-8
[ https://issues.apache.org/jira/browse/FLINK-13807?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16911904#comment-16911904 ] TisonKun commented on FLINK-13807: -- Thanks for report this issue [~ethanli]! [~Zentol] I think we can address this issue by always reading the file with UTF-8 charset. [~ethanli] could you try out the patch I have just attached? I test in my local environment and see no conflict. I volunteer to follow up this issue. > Flink-avro unit tests fails if the character encoding in the environment is > not default to UTF-8 > > > Key: FLINK-13807 > URL: https://issues.apache.org/jira/browse/FLINK-13807 > Project: Flink > Issue Type: Bug >Affects Versions: 1.8.0 >Reporter: Ethan Li >Priority: Minor > Attachments: patch.diff > > > On Flink release-1.8 branch: > {code:java} > [ERROR] Tests run: 12, Failures: 4, Errors: 0, Skipped: 0, Time elapsed: 4.81 > s <<< FAILURE! - in > org.apache.flink.formats.avro.typeutils.AvroTypeExtractionTest > [ERROR] testSimpleAvroRead[Execution mode = > CLUSTER](org.apache.flink.formats.avro.typeutils.AvroTypeExtractionTest) > Time elapsed: 0.438 s <<< FAILURE! > java.lang.AssertionError: > Different elements in arrays: expected 2 elements and received 2 > files: [/tmp/junit5386344396421857812/junit6023978980792200274.tmp/4, > /tmp/junit5386344396421857812/junit6023978980792200274.tmp/2, > /tmp/junit5386344396421857812/junit6023978980792200274.tmp/1, > /tmp/junit5386344396421857812/junit6023978980792200274.tmp/3] > expected: [{"name": "Alyssa", "favorite_number": 256, "favorite_color": > null, "type_long_test": null, "type_double_test": 123.45, "type_null_test": > null, "type_bool_test": true, "type_array_string": ["ELEMENT 1", "ELEMENT > 2"], "type_array_boolean": [true, false], "type_nullable_array": null, > "type_enum": "GREEN", "type_map": {"KEY 2": 17554, "KEY 1": 8546456}, > "type_fixed": null, "type_union": null, "type_nested": {"num": 239, "street": > "Baker Street", "city": "London", "state": "London", "zip": "NW1 6XE"}, > "type_bytes": {"bytes": > "\u\u\u\u\u\u\u\u\u\u"}, "type_date": > 2014-03-01, "type_time_millis": 12:12:12.000, "type_time_micros": 123456, > "type_timestamp_millis": 2014-03-01T12:12:12.321Z, "type_timestamp_micros": > 123456, "type_decimal_bytes": {"bytes": "\u0007?"}, "type_decimal_fixed": [7, > -48]}, {"name": "Charlie", "favorite_number": null, "favorite_color": "blue", > "type_long_test": 1337, "type_double_test": 1.337, "type_null_test": null, > "type_bool_test": false, "type_array_string": [], "type_array_boolean": [], > "type_nullable_array": null, "type_enum": "RED", "type_map": {}, > "type_fixed": null, "type_union": null, "type_nested": {"num": 239, "street": > "Baker Street", "city": "London", "state": "London", "zip": "NW1 6XE"}, > "type_bytes": {"bytes": > "\u\u\u\u\u\u\u\u\u\u"}, "type_date": > 2014-03-01, "type_time_millis": 12:12:12.000, "type_time_micros": 123456, > "type_timestamp_millis": 2014-03-01T12:12:12.321Z, "type_timestamp_micros": > 123456, "type_decimal_bytes": {"bytes": "\u0007?"}, "type_decimal_fixed": [7, > -48]}] > received: [{"name": "Alyssa", "favorite_number": 256, "favorite_color": > null, "type_long_test": null, "type_double_test": 123.45, "type_null_test": > null, "type_bool_test": true, "type_array_string": ["ELEMENT 1", "ELEMENT > 2"], "type_array_boolean": [true, false], "type_nullable_array": null, > "type_enum": "GREEN", "type_map": {"KEY 2": 17554, "KEY 1": 8546456}, > "type_fixed": null, "type_union": null, "type_nested": {"num": 239, "street": > "Baker Street", "city": "London", "state": "London", "zip": "NW1 6XE"}, > "type_bytes": {"bytes": > "\u\u\u\u\u\u\u\u\u\u"}, "type_date": > 2014-03-01, "type_time_millis": 12:12:12.000, "type_time_micros": 123456, > "type_timestamp_millis": 2014-03-01T12:12:12.321Z, "type_timestamp_micros": > 123456, "type_decimal_bytes": {"bytes": "\u0007??"}, "type_decimal_fixed": > [7, -48]}, {"name": "Charlie", "favorite_number": null, "favorite_color": > "blue", "type_long_test": 1337, "type_double_test": 1.337, "type_null_test": > null, "type_bool_test": false, "type_array_string": [], "type_array_boolean": > [], "type_nullable_array": null, "type_enum": "RED", "type_map": {}, > "type_fixed": null, "type_union": null, "type_nested": {"num": 239, "street": > "Baker Street", "city": "London", "state": "London", "zip": "NW1 6XE"}, > "type_bytes": {"bytes": > "\u\u\u\u\u\u\u\u\u\u"}, "type_date": > 2014-03-01, "type_time_millis": 12:12:12.000, "type_time_micros": 123456, >