COMMONSRDF-51: compare language tags in lower case
Project: http://git-wip-us.apache.org/repos/asf/commons-rdf/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-rdf/commit/3064d219 Tree: http://git-wip-us.apache.org/repos/asf/commons-rdf/tree/3064d219 Diff: http://git-wip-us.apache.org/repos/asf/commons-rdf/diff/3064d219 Branch: refs/heads/COMMONSRDF-47 Commit: 3064d219606cbe42c0150d81dbf6cdbc74bf7491 Parents: 0e1969a Author: Stian Soiland-Reyes <[email protected]> Authored: Thu Jan 12 14:51:26 2017 +0000 Committer: Stian Soiland-Reyes <[email protected]> Committed: Thu Jan 12 14:51:26 2017 +0000 ---------------------------------------------------------------------- .../org/apache/commons/rdf/api/Literal.java | 27 +++-- .../apache/commons/rdf/api/AbstractRDFTest.java | 118 ++++++++++++++++++- .../commons/rdf/jena/impl/JenaLiteralImpl.java | 13 +- .../commons/rdf/jsonldjava/JsonLdLiteral.java | 13 +- .../commons/rdf/rdf4j/impl/LiteralImpl.java | 15 ++- .../apache/commons/rdf/simple/LiteralImpl.java | 8 +- 6 files changed, 169 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-rdf/blob/3064d219/api/src/main/java/org/apache/commons/rdf/api/Literal.java ---------------------------------------------------------------------- diff --git a/api/src/main/java/org/apache/commons/rdf/api/Literal.java b/api/src/main/java/org/apache/commons/rdf/api/Literal.java index a434a73..ea6e3a4 100644 --- a/api/src/main/java/org/apache/commons/rdf/api/Literal.java +++ b/api/src/main/java/org/apache/commons/rdf/api/Literal.java @@ -18,6 +18,7 @@ package org.apache.commons.rdf.api; import java.io.Serializable; +import java.util.Locale; import java.util.Objects; import java.util.Optional; @@ -72,7 +73,13 @@ public interface Literal extends RDFTerm { * <a href="http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" * >http://www.w3.org/1999/02/22-rdf-syntax-ns#langString</a>, this method * must return {@link Optional#empty()}. - * + * <p> + * The value space of language tags is always in lower case; although + * RDF implementations MAY convert all language tags to lower case, + * safe comparisons of language tags should be done using + * {@link String#toLowerCase(Locale)} with the locale + * {@link Locale#ROOT}. + * <p> * Implementation note: If your application requires {@link Serializable} * objects, it is best not to store an {@link Optional} in a field. It is * recommended to use {@link Optional#ofNullable(Object)} to create the @@ -80,8 +87,8 @@ public interface Literal extends RDFTerm { * * @return The {@link Optional} language tag for this literal. If * {@link Optional#isPresent()} returns true, the value returned by - * {@link Optional#get()} must be a non-empty string conforming to - * BCP47. + * {@link Optional#get()} must be a non-empty language tag string + * conforming to BCP47. * @see <a href= * "http://www.w3.org/TR/rdf11-concepts/#dfn-language-tag">RDF-1.1 * Literal language tag</a> @@ -89,14 +96,20 @@ public interface Literal extends RDFTerm { Optional<String> getLanguageTag(); /** - * Check it this Literal is equal to another Literal. <blockquote> + * Check it this Literal is equal to another Literal. + * <blockquote> * <a href="http://www.w3.org/TR/rdf11-concepts/#dfn-literal-term">Literal - * term equality</a>: Two literals are term-equal (the same RDF literal) if + * term equality</a>: + * Two literals are term-equal (the same RDF literal) if * and only if the two lexical forms, the two datatype IRIs, and the two * language tags (if any) compare equal, character by character. Thus, two * literals can have the same value without being the same RDF term. * </blockquote> - * + * As the value space for language tags is lower-space, if they are present, + * they MUST be compared character by character + * using the equivalent of {@link String#toLowerCase(java.util.Locale)} with + * the locale {@link Locale#ROOT}. + * <p> * Implementations MUST also override {@link #hashCode()} so that two equal * Literals produce the same hash code. * @@ -114,7 +127,7 @@ public interface Literal extends RDFTerm { * The returned hash code MUST be equal to the result of * {@link Objects#hash(Object...)} with the arguments * {@link #getLexicalForm()}, {@link #getDatatype()}, - * {@link #getLanguageTag()}. + * {@link #getLanguageTag()}<code>.map(s->s.toLowerString(Locale.ROOT))</code>. * <p> * This method MUST be implemented in conjunction with * {@link #equals(Object)} so that two equal Literals produce the same hash http://git-wip-us.apache.org/repos/asf/commons-rdf/blob/3064d219/api/src/test/java/org/apache/commons/rdf/api/AbstractRDFTest.java ---------------------------------------------------------------------- diff --git a/api/src/test/java/org/apache/commons/rdf/api/AbstractRDFTest.java b/api/src/test/java/org/apache/commons/rdf/api/AbstractRDFTest.java index 85e7b61..5efcee1 100644 --- a/api/src/test/java/org/apache/commons/rdf/api/AbstractRDFTest.java +++ b/api/src/test/java/org/apache/commons/rdf/api/AbstractRDFTest.java @@ -17,13 +17,13 @@ */ package org.apache.commons.rdf.api; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.*; +import java.util.Locale; import java.util.Objects; +import java.util.Optional; +import org.junit.Assume; import org.junit.Before; import org.junit.Test; @@ -139,7 +139,7 @@ public abstract class AbstractRDFTest { assertEquals("<http://example.com/vocab#term>", term.ntriplesString()); // and now for the international fun! - + // make sure this file is edited/compiled as UTF-8 final IRI latin1 = factory.createIRI("http://accént.example.com/première"); assertEquals("http://accént.example.com/première", latin1.getIRIString()); assertEquals("<http://accént.example.com/première>", latin1.ntriplesString()); @@ -194,6 +194,114 @@ public abstract class AbstractRDFTest { assertEquals("\"Herbert Van de Sompel\"@vls", vls.ntriplesString()); } + public void testCreateLiteralLangCaseInsensitive() throws Exception { + // COMMONSRDF-51: Literal langtag may not be in lowercase, but + // must be COMPARED (aka .equals and .hashCode()) in lowercase + // as the language space is lower case. + final Literal lower = factory.createLiteral("Hello", "en-gb"); + final Literal upper = factory.createLiteral("Hello", "EN-GB"); + final Literal mixed = factory.createLiteral("Hello", "en-GB"); + + + assertEquals("en-gb", lower.getLanguageTag().get()); + + // NOTE: the RDF framework is free to lowercase the language tag + // or leave it as-is, so we can't assume: + /* + assertEquals("en-gb", upper.getLanguageTag().get()); + assertEquals("en-gb", mixed.getLanguageTag().get()); + */ + // ..unless we do a case-insensitive comparison: + assertEquals("en-gb", + upper.getLanguageTag().get().toLowerCase(Locale.ROOT)); + assertEquals("en-gb", + mixed.getLanguageTag().get().toLowerCase(Locale.ROOT)); + + // However these should all be true + assertEquals(lower, lower); + assertEquals(lower, upper); + assertEquals(lower, mixed); + assertEquals(upper, lower); + assertEquals(upper, upper); + assertEquals(upper, mixed); + assertEquals(mixed, lower); + assertEquals(mixed, upper); + assertEquals(mixed, mixed); + + // And then by java.lang.Object contract, also the hashcode: + assertEquals(lower.hashCode(), upper.hashCode()); + assertEquals(lower.hashCode(), mixed.hashCode()); + } + + @Test + public void testCreateLiteralLangCaseInsensitiveOther() throws Exception { + // COMMONSRDF-51: Ensure the Literal is using case insensitive + // comparison against more 'liberal' literal implementations + // which may not have done .toLowerString() in their constructor + final Literal lower = factory.createLiteral("Hello", "en-gb"); + final Literal upper = factory.createLiteral("Hello", "EN-GB"); + final Literal mixed = factory.createLiteral("Hello", "en-GB"); + + Literal otherLiteral = new Literal() { + @Override + public String ntriplesString() { + return "Hello@en-GB"; + } + @Override + public String getLexicalForm() { + return "Hello"; + } + @Override + public Optional<String> getLanguageTag() { + return Optional.of("en-GB"); + } + @Override + public IRI getDatatype() { + return factory.createIRI("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"); + } + @Override + public boolean equals(Object obj) { + throw new RuntimeException("Wrong way comparison of literal"); + } + }; + + assertEquals(mixed, otherLiteral); + assertEquals(lower, otherLiteral); + assertEquals(upper, otherLiteral); + } + + @Test + public void testCreateLiteralLangCaseInsensitiveInTurkish() throws Exception { + // COMMONSRDF-51: Special test for Turkish issue where + // "i".toLowerCase() != "i" + // See also: + // https://garygregory.wordpress.com/2015/11/03/java-lowercase-conversion-turkey/ + Locale defaultLocale = Locale.getDefault(); + try { + Locale turkish = Locale.forLanguageTag("TR"); + Locale.setDefault(turkish); + Assume.assumeFalse("FI".toLowerCase().equals("fi")); + + final Literal lower = factory.createLiteral("moi", "fi"); + final Literal upper = factory.createLiteral("moi", "FI"); + final Literal mixed = factory.createLiteral("moi", "fI"); + + assertEquals(lower, lower); + assertEquals(lower, upper); + assertEquals(lower, mixed); + assertEquals(upper, lower); + assertEquals(upper, upper); + assertEquals(upper, mixed); + assertEquals(mixed, lower); + assertEquals(mixed, upper); + assertEquals(mixed, mixed); + assertEquals(lower.hashCode(), upper.hashCode()); + assertEquals(lower.hashCode(), mixed.hashCode()); + } finally { + Locale.setDefault(defaultLocale); + } + } + @Test public void testCreateLiteralString() throws Exception { final Literal example = factory.createLiteral("Example", http://git-wip-us.apache.org/repos/asf/commons-rdf/blob/3064d219/jena/src/main/java/org/apache/commons/rdf/jena/impl/JenaLiteralImpl.java ---------------------------------------------------------------------- diff --git a/jena/src/main/java/org/apache/commons/rdf/jena/impl/JenaLiteralImpl.java b/jena/src/main/java/org/apache/commons/rdf/jena/impl/JenaLiteralImpl.java index faee060..8a0c9ee 100644 --- a/jena/src/main/java/org/apache/commons/rdf/jena/impl/JenaLiteralImpl.java +++ b/jena/src/main/java/org/apache/commons/rdf/jena/impl/JenaLiteralImpl.java @@ -18,6 +18,7 @@ package org.apache.commons.rdf.jena.impl; +import java.util.Locale; import java.util.Objects; import java.util.Optional; @@ -37,6 +38,10 @@ class JenaLiteralImpl extends AbstractJenaRDFTerm implements JenaLiteral { throw new IllegalArgumentException("Node is not a literal: " + node); } } + + private static String lowerCase(String langTag) { + return langTag.toLowerCase(Locale.ROOT); + } @Override public boolean equals(final Object other) { @@ -50,8 +55,10 @@ class JenaLiteralImpl extends AbstractJenaRDFTerm implements JenaLiteral { return false; } final Literal literal = (Literal) other; - return getLexicalForm().equals(literal.getLexicalForm()) && getLanguageTag().equals(literal.getLanguageTag()) - && getDatatype().equals(literal.getDatatype()); + return getLexicalForm().equals(literal.getLexicalForm()) && + getDatatype().equals(literal.getDatatype()) && + getLanguageTag().map(JenaLiteralImpl::lowerCase).equals( + literal.getLanguageTag().map(JenaLiteralImpl::lowerCase)); } @Override @@ -75,6 +82,6 @@ class JenaLiteralImpl extends AbstractJenaRDFTerm implements JenaLiteral { @Override public int hashCode() { - return Objects.hash(getLexicalForm(), getDatatype(), getLanguageTag()); + return Objects.hash(getLexicalForm(), getDatatype(), getLanguageTag().map(JenaLiteralImpl::lowerCase)); } } http://git-wip-us.apache.org/repos/asf/commons-rdf/blob/3064d219/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/JsonLdLiteral.java ---------------------------------------------------------------------- diff --git a/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/JsonLdLiteral.java b/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/JsonLdLiteral.java index 2414087..0d63c29 100644 --- a/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/JsonLdLiteral.java +++ b/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/JsonLdLiteral.java @@ -17,6 +17,7 @@ */ package org.apache.commons.rdf.jsonldjava; +import java.util.Locale; import java.util.Objects; import java.util.Optional; @@ -38,6 +39,10 @@ class JsonLdLiteralImpl extends JsonLdTermImpl implements JsonLdLiteral { } } + private static String lowerCase(String langTag) { + return langTag.toLowerCase(Locale.ROOT); + } + @Override public String ntriplesString() { final StringBuilder sb = new StringBuilder(); @@ -76,9 +81,8 @@ class JsonLdLiteralImpl extends JsonLdTermImpl implements JsonLdLiteral { @Override public int hashCode() { - // Should be the same as - // Objects.hash(getLexicalForm(), getDatatype(), getLanguageTag()); - return Objects.hash(node.getValue(), node.getDatatype(), node.getLanguage()); + return Objects.hash(node.getValue(), node.getDatatype(), + getLanguageTag().map(JsonLdLiteralImpl::lowerCase)); } @Override @@ -90,7 +94,8 @@ class JsonLdLiteralImpl extends JsonLdTermImpl implements JsonLdLiteral { if (obj instanceof Literal) { final Literal other = (Literal) obj; return getLexicalForm().equals(other.getLexicalForm()) && getDatatype().equals(other.getDatatype()) - && getLanguageTag().equals(other.getLanguageTag()); + && getLanguageTag().map(JsonLdLiteralImpl::lowerCase) + .equals(other.getLanguageTag().map(JsonLdLiteralImpl::lowerCase)); } return false; http://git-wip-us.apache.org/repos/asf/commons-rdf/blob/3064d219/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/impl/LiteralImpl.java ---------------------------------------------------------------------- diff --git a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/impl/LiteralImpl.java b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/impl/LiteralImpl.java index 16d70b8..253b645 100644 --- a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/impl/LiteralImpl.java +++ b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/impl/LiteralImpl.java @@ -17,6 +17,7 @@ */ package org.apache.commons.rdf.rdf4j.impl; +import java.util.Locale; import java.util.Objects; import java.util.Optional; @@ -32,6 +33,10 @@ final class LiteralImpl extends AbstractRDFTerm<org.eclipse.rdf4j.model.Literal> super(literal); } + private static String lowerCase(String langTag) { + return langTag.toLowerCase(Locale.ROOT); + } + @Override public boolean equals(final Object obj) { if (obj == this) { @@ -39,9 +44,10 @@ final class LiteralImpl extends AbstractRDFTerm<org.eclipse.rdf4j.model.Literal> } if (obj instanceof org.apache.commons.rdf.api.Literal) { final org.apache.commons.rdf.api.Literal other = (org.apache.commons.rdf.api.Literal) obj; - return getLexicalForm().equals(other.getLexicalForm()) && getDatatype().equals(other.getDatatype()) - && getLanguageTag().equals(other.getLanguageTag()); - + return getLexicalForm().equals(other.getLexicalForm()) && + getDatatype().equals(other.getDatatype()) && + getLanguageTag().map(LiteralImpl::lowerCase).equals( + other.getLanguageTag().map(LiteralImpl::lowerCase)); } return false; } @@ -63,7 +69,8 @@ final class LiteralImpl extends AbstractRDFTerm<org.eclipse.rdf4j.model.Literal> @Override public int hashCode() { - return Objects.hash(value.getLabel(), value.getDatatype(), value.getLanguage()); + return Objects.hash(value.getLabel(), value.getDatatype(), + getLanguageTag().map(LiteralImpl::lowerCase)); } @Override http://git-wip-us.apache.org/repos/asf/commons-rdf/blob/3064d219/simple/src/main/java/org/apache/commons/rdf/simple/LiteralImpl.java ---------------------------------------------------------------------- diff --git a/simple/src/main/java/org/apache/commons/rdf/simple/LiteralImpl.java b/simple/src/main/java/org/apache/commons/rdf/simple/LiteralImpl.java index 3cca4c6..763a629 100644 --- a/simple/src/main/java/org/apache/commons/rdf/simple/LiteralImpl.java +++ b/simple/src/main/java/org/apache/commons/rdf/simple/LiteralImpl.java @@ -52,7 +52,7 @@ final class LiteralImpl implements Literal, SimpleRDF.SimpleRDFTerm { public LiteralImpl(final String literal, final String languageTag) { this.lexicalForm = Objects.requireNonNull(literal); - this.languageTag = Objects.requireNonNull(languageTag).toLowerCase(Locale.ENGLISH); + this.languageTag = Objects.requireNonNull(lowerCase(languageTag)); if (languageTag.isEmpty()) { // TODO: Check against // http://www.w3.org/TR/n-triples/#n-triples-grammar @@ -116,6 +116,10 @@ final class LiteralImpl implements Literal, SimpleRDF.SimpleRDFTerm { return Objects.hash(lexicalForm, dataType, languageTag); } + private static String lowerCase(String langTag) { + return langTag.toLowerCase(Locale.ROOT); + } + @Override public boolean equals(final Object obj) { if (this == obj) { @@ -126,7 +130,7 @@ final class LiteralImpl implements Literal, SimpleRDF.SimpleRDFTerm { } final Literal literal = (Literal) obj; return getDatatype().equals(literal.getDatatype()) && getLexicalForm().equals(literal.getLexicalForm()) - && getLanguageTag().equals(literal.getLanguageTag()); + && getLanguageTag().equals(literal.getLanguageTag().map(LiteralImpl::lowerCase)); } }
