Repository: avro Updated Branches: refs/heads/master 9101a42ba -> 5e6ffb8d4
AVRO-1493: Java: Schema fingerprint vary by locale Project: http://git-wip-us.apache.org/repos/asf/avro/repo Commit: http://git-wip-us.apache.org/repos/asf/avro/commit/5e6ffb8d Tree: http://git-wip-us.apache.org/repos/asf/avro/tree/5e6ffb8d Diff: http://git-wip-us.apache.org/repos/asf/avro/diff/5e6ffb8d Branch: refs/heads/master Commit: 5e6ffb8d444c0ed3fb6d0180718a9a7c131f2ce6 Parents: 9101a42 Author: Kevin Schultz <[email protected]> Authored: Wed Dec 9 16:01:59 2015 -0500 Committer: Ryan Blue <[email protected]> Committed: Sun Feb 21 16:43:30 2016 -0800 ---------------------------------------------------------------------- CHANGES.txt | 4 +++- .../src/main/java/org/apache/avro/Schema.java | 9 +++++---- .../apache/avro/TestSchemaNormalization.java | 21 ++++++++++++++++++++ 3 files changed, 29 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/avro/blob/5e6ffb8d/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index b23636e..cadc982 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -17,6 +17,8 @@ Trunk (not yet released) BUG FIXES + AVRO-1493. Java: Avoid the "Turkish Locale Problem". Schema fingerprints are + now consistent regardless of the environment's locale. Avro 1.8.0 (22 January 2016) @@ -51,7 +53,7 @@ Avro 1.8.0 (22 January 2016) (Ryan Blue via cutting) AVRO-570. Python: Add connector for tethered mapreduce. - (Jeremy Lewi and Steven Willis via cutting) + (Jeremy Lewi and Steven Willis via cutting) AVRO-834. Java: Data File corruption recovery tool. (scottcarey and tomwhite) http://git-wip-us.apache.org/repos/asf/avro/blob/5e6ffb8d/lang/java/avro/src/main/java/org/apache/avro/Schema.java ---------------------------------------------------------------------- diff --git a/lang/java/avro/src/main/java/org/apache/avro/Schema.java b/lang/java/avro/src/main/java/org/apache/avro/Schema.java index 9a201ce..600f7aa 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Schema.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Schema.java @@ -32,6 +32,7 @@ import java.util.IdentityHashMap; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; @@ -93,7 +94,7 @@ public abstract class Schema extends JsonProperties { RECORD, ENUM, ARRAY, MAP, UNION, FIXED, STRING, BYTES, INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL; private String name; - private Type() { this.name = this.name().toLowerCase(); } + private Type() { this.name = this.name().toLowerCase(Locale.ENGLISH); } public String getName() { return name; } }; @@ -377,7 +378,7 @@ public abstract class Schema extends JsonProperties { public enum Order { ASCENDING, DESCENDING, IGNORE; private String name; - private Order() { this.name = this.name().toLowerCase(); } + private Order() { this.name = this.name().toLowerCase(Locale.ENGLISH); } }; private final String name; // name of the field. @@ -749,7 +750,7 @@ public abstract class Schema extends JsonProperties { throw new SchemaParseException("Duplicate enum symbol: "+symbol); } public List<String> getEnumSymbols() { return symbols; } - public boolean hasEnumSymbol(String symbol) { + public boolean hasEnumSymbol(String symbol) { return ordinals.containsKey(symbol); } public int getEnumOrdinal(String symbol) { return ordinals.get(symbol); } public boolean equals(Object o) { @@ -1269,7 +1270,7 @@ public abstract class Schema extends JsonProperties { Field.Order order = Field.Order.ASCENDING; JsonNode orderNode = field.get("order"); if (orderNode != null) - order = Field.Order.valueOf(orderNode.getTextValue().toUpperCase()); + order = Field.Order.valueOf(orderNode.getTextValue().toUpperCase(Locale.ENGLISH)); JsonNode defaultValue = field.get("default"); if (defaultValue != null && (Type.FLOAT.equals(fieldSchema.getType()) http://git-wip-us.apache.org/repos/asf/avro/blob/5e6ffb8d/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java ---------------------------------------------------------------------- diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java index 405d74d..f8c0413 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.util.List; import java.util.ArrayList; import java.util.Formatter; +import java.util.Locale; import org.junit.Test; import org.junit.runner.RunWith; @@ -68,6 +69,26 @@ public class TestSchemaNormalization { } } + // see AVRO-1493 + @RunWith(Parameterized.class) + public static class TestFingerprintInternationalization { + String input, expectedOutput; + public TestFingerprintInternationalization(String i, String o) { input=i; expectedOutput=o; } + + @Parameters public static List<Object[]> cases() throws IOException + { return CaseFinder.find(data(),"fingerprint",new ArrayList<Object[]>()); } + + @Test public void testCanonicalization() throws Exception { + Locale originalDefaultLocale = Locale.getDefault(); + Locale.setDefault(Locale.forLanguageTag("tr")); + Schema s = Schema.parse(input); + long carefulFP = altFingerprint(SchemaNormalization.toParsingForm(s)); + assertEquals(carefulFP, Long.parseLong(expectedOutput)); + assertEqHex(carefulFP, SchemaNormalization.parsingFingerprint64(s)); + Locale.setDefault(originalDefaultLocale); + } + } + private static String DATA_FILE = (System.getProperty("share.dir", "../../../share") + "/test/data/schema-tests.txt");
