This is an automated email from the ASF dual-hosted git repository. andy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/jena.git
commit 8e76a9fdd27b7fff17ef61facfcbb70646359083 Author: Andy Seaborne <[email protected]> AuthorDate: Tue Jul 22 18:32:38 2025 +0100 GH-3335: Prefix determination from @context --- .../org/apache/jena/riot/lang/LangJSONLD11.java | 102 +++++++++++++-------- .../java/org/apache/jena/riot/lang/TS_Lang.java | 2 + .../jena/riot/lang/TestLangJsonLD_prefixes.java | 97 ++++++++++++++++++++ 3 files changed, 162 insertions(+), 39 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/LangJSONLD11.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/LangJSONLD11.java index 4a7a1d8227..d960bdb2e7 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/LangJSONLD11.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/LangJSONLD11.java @@ -33,10 +33,7 @@ import com.apicatalog.rdf.RdfDataset; import com.apicatalog.rdf.api.RdfConsumerException; import com.apicatalog.rdf.api.RdfQuadConsumer; -import jakarta.json.JsonObject; -import jakarta.json.JsonString; -import jakarta.json.JsonStructure; -import jakarta.json.JsonValue; +import jakarta.json.*; import jakarta.json.stream.JsonLocation; import org.apache.jena.atlas.logging.Log; import org.apache.jena.atlas.web.ContentType; @@ -44,6 +41,7 @@ import org.apache.jena.datatypes.RDFDatatype; import org.apache.jena.datatypes.TypeMapper; import org.apache.jena.graph.Node; import org.apache.jena.graph.Triple; +import org.apache.jena.irix.IRIs; import org.apache.jena.riot.Lang; import org.apache.jena.riot.ReaderRIOT; import org.apache.jena.riot.RiotException; @@ -106,7 +104,6 @@ public class LangJSONLD11 implements ReaderRIOT { Document document = JsonDocument.of(in); read(document, baseURI, output, context); } catch (JsonLdError ex) { - ex.printStackTrace(); handleJsonLdError(ex); } catch (Exception ex) { errorHandler.error(ex.getMessage(), -1, -1); @@ -131,7 +128,7 @@ public class LangJSONLD11 implements ReaderRIOT { * JSON-LD does not define prefixes. * <p> * The use of "prefix:localname" happens for any definition of "prefix" in the - * {@literal @context} even if intended for a URI e.g a property. + * {@literal @context} even if intended for a URI e.g. a property. * </p> * <p> * We could extract any {"key" : "value"} from the context but we add a @@ -141,54 +138,80 @@ public class LangJSONLD11 implements ReaderRIOT { * <p> * In addition, {@literal @vocab} becomes prefix "". * </p> + * <p> + * The code assumes the structure is well-formed - it passed JSON-LD parsing by Titanium. + * </p> */ private static void extractPrefixes(Document document, BiConsumer<String, String> action) { try { - JsonStructure js = document.getJsonContent().orElseThrow(); - switch (js.getValueType()) { - case ARRAY: - extractPrefixes(js, action); - break; - case OBJECT: - JsonValue jv = js.asJsonObject().get(Keywords.CONTEXT); - extractPrefixes(jv, action); - break; - default: - break; - } + + JsonStructure jsonStructure = document.getJsonContent().orElse(null); + if ( jsonStructure == null ) + return; + extractPrefixesValue(jsonStructure, action); } catch (Throwable ex) { Log.warn(LangJSONLD11.class, "Unexpected problem while extracting prefixes: " + ex.getMessage(), ex); } } - private static void extractPrefixes(JsonValue jsonValue, BiConsumer<String, String> action) { - if (jsonValue == null) - return; - // JSON-LD 1.1 section 9.4 + /** + * <a href="https://www.w3.org/TR/json-ld11/#graph-objects">JSON-LD 1.1 section 9.4 Graph Objects</a>. + * Assume the structure is well-fromed - it passed JSON-LD parsing by Titanium. + */ + private static void extractPrefixesValue(JsonValue jsonValue, BiConsumer<String, String> action) { switch (jsonValue.getValueType()) { - case ARRAY: - jsonValue.asJsonArray().forEach(jv -> extractPrefixes(jv, action)); - break; - case OBJECT: - extractPrefixesCxtDefn(jsonValue.asJsonObject(), action); - break; - case NULL: - break; // We are only interested in prefixes - case STRING: - break; // We are only interested in prefixes - default: - break; + case ARRAY -> extractPrefixesArray(jsonValue.asJsonArray(), action); + case OBJECT -> extractPrefixesObject(jsonValue.asJsonObject(), action); + default->{} + } + } + + private static void extractPrefixesArray(JsonArray jsonArray, BiConsumer<String, String> action) { + jsonArray.forEach(jv -> extractPrefixesValue(jv, action)); + } + + /** + * <a href="https://www.w3.org/TR/json-ld11/#graph-objects">JSON-LD 1.1 section 9.4 Graph Objects</a>. + */ + private static void extractPrefixesObject(JsonObject jsonObject, BiConsumer<String, String> action) { + JsonValue contextValue = jsonObject.get(Keywords.CONTEXT); + if ( contextValue == null ) + return; + // If the graph object contains the @context key, its value MUST be null, an IRI reference, a context definition, or an array composed of any of these. + switch (contextValue.getValueType()) { + // Assuming the contextValue is valid (Titanium parsed it). + case ARRAY -> extractPrefixesContextArray(contextValue.asJsonArray(), action); + case OBJECT -> extractPrefixesContextDefinition(contextValue.asJsonObject(), action); + // URI or null. + default -> {} } } - private static void extractPrefixesCxtDefn(JsonObject jCxt, BiConsumer<String, String> action) { + // @context [ ] + private static void extractPrefixesContextArray(JsonArray jsonArray, BiConsumer<String, String> action) { + jsonArray.forEach(cxtArrayEntry -> { + switch (cxtArrayEntry.getValueType()) { + case OBJECT -> extractPrefixesContextDefinition(cxtArrayEntry.asJsonObject(), action); + // URI or null. + default -> {} + } + }); + } + + /** + * Extract prefixes from a context definition. + * <p> + * "A context definition defines a local context in a node object." + */ + private static void extractPrefixesContextDefinition(JsonObject jCxt, BiConsumer<String, String> action) { + // Assume the local context is valid. Set<String> keys = jCxt.keySet(); keys.stream().forEach(k -> { // "@vocab" : "uri" // "shortName" : "uri" // "shortName" : { "@type":"@id" , "@id": "uri" } -- presumed to be a single property aliases, not a prefix. - JsonValue jvx = jCxt.get(k); - if (JsonValue.ValueType.STRING != jvx.getValueType()) + JsonValue jValue = jCxt.get(k); + if (JsonValue.ValueType.STRING != jValue.getValueType()) return; String prefix = k; if (Keywords.VOCAB.equals(k)) @@ -197,9 +220,10 @@ public class LangJSONLD11 implements ReaderRIOT { // Keyword, not @vocab. return; // Pragmatic filter: URI ends in "#" or "/" or ":" - String uri = JsonString.class.cast(jvx).getString(); + String uri = JsonString.class.cast(jValue).getString(); if (uri.endsWith("#") || uri.endsWith("/") || uri.endsWith(":")) { - action.accept(prefix, uri); + if ( IRIs.check(uri) ) + action.accept(prefix, uri); return; } }); diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/TS_Lang.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/TS_Lang.java index e54203178a..17f22e283b 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/TS_Lang.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/TS_Lang.java @@ -43,6 +43,7 @@ import org.apache.jena.riot.lang.rdfxml.TC_RIOT_RDFXML; , TestLangJsonLD.class , TestLangJsonLD_DocLoader.class + , TestLangJsonLD_prefixes.class , TestRDFXML_ReaderProperties.class , TestTriXReader.class @@ -51,6 +52,7 @@ import org.apache.jena.riot.lang.rdfxml.TC_RIOT_RDFXML; // Thrift is done in the "thrift" package , TestParserFactory.class , TestCollectorStream.class + , TC_RIOT_RDFXML.class }) diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangJsonLD_prefixes.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangJsonLD_prefixes.java new file mode 100644 index 0000000000..666aea89cb --- /dev/null +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangJsonLD_prefixes.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.riot.lang; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.StringReader; + +import com.apicatalog.jsonld.JsonLdError; + +import org.junit.jupiter.api.Test; + +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFDataMgr; +import org.apache.jena.riot.RiotException; + +public class TestLangJsonLD_prefixes { + // GH-3335 + // https://github.com/apache/jena/issues/3335 + @Test + public void prefixes_array_object() { + String jsonStr = """ + [ { "@context": "http://schema.org", + "@id": "https://data.example.org/dataset/entity", + "@type": "Thing", + "description": "A value that ends with a colon:" + } ] + """; + parseTestPrefixes(jsonStr); + } + + @Test + public void prefixes_bad_uri() { + // Bad prefix in the context - Titanium catches it. + String jsonStr = """ + { "@context": { "ns2": "A string that ends with a colon:" } , + "@id": "https://data.example.org/dataset/entity", + "ns2:abc": "ABC" + } + """; + RiotException ex = assertThrows(RiotException.class, ()->parseTestPrefixes(jsonStr)); + assertTrue(ex.getCause() instanceof JsonLdError); + } + + @Test + public void prefixes_array_context() { + String jsonStr = """ + { "@context": [ { "ns2": "urn:jena:" } , null ] , + "ns2:abc": "NS" + } + """; + parseTestPrefixes(jsonStr, "ns2"); + } + + @Test + public void prefixes_array_context_2() { + String jsonStr = """ + { + "@context": [ { "ns1": "urn:jena:ns1#" } , { "ns2": "urn:jena:ns2#" } ] , + "ns1:abc": "abc1", + "ns2:abc": "abc2" + } + """; + parseTestPrefixes(jsonStr, "ns1", "ns2"); + } + + private void parseTestPrefixes(String json, String... namespaces) { + Model model = ModelFactory.createDefaultModel(); + StringReader sr = new StringReader(json); + RDFDataMgr.read(model, sr, null, Lang.JSONLD11); + assertEquals(namespaces.length, model.getNsPrefixMap().size()); + for (String ns : namespaces ) { + assertNotNull(model.getNsPrefixURI(ns), ()->"Failed to find prefix '"+ns+"'"); + } + } +}
