This is an automated email from the ASF dual-hosted git repository.
andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git
The following commit(s) were added to refs/heads/main by this push:
new 5ae97c93cb GH-3835: Fix for canonicalization of language tags
5ae97c93cb is described below
commit 5ae97c93cb1b5e6daa7bc7925990d451f21e0a41
Author: Andy Seaborne <[email protected]>
AuthorDate: Tue Apr 7 19:02:58 2026 +0100
GH-3835: Fix for canonicalization of language tags
---
.../jena/riot/writer/c14n/NQuadsWriter_C14N.java | 2 +-
.../jena/riot/writer/c14n/NTriplesWriter_C14N.java | 2 +-
.../jena/riot/writer/c14n/NodeFormatter_C14N.java | 10 ++++--
.../java/org/apache/jena/riot/out/TestNodeFmt.java | 37 ++++++++++++++++------
4 files changed, 36 insertions(+), 15 deletions(-)
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NQuadsWriter_C14N.java
b/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NQuadsWriter_C14N.java
index 700dde4c59..86c17dc2bc 100644
---
a/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NQuadsWriter_C14N.java
+++
b/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NQuadsWriter_C14N.java
@@ -27,6 +27,6 @@ import org.apache.jena.riot.writer.NQuadsWriter;
public class NQuadsWriter_C14N extends NQuadsWriter {
@Override
protected NodeFormatter createNodeFormatter() {
- return new NodeFormatter_C14N(charSpace);
+ return new NodeFormatter_C14N();
}
}
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NTriplesWriter_C14N.java
b/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NTriplesWriter_C14N.java
index 2894eaa51a..4b5604dd15 100644
---
a/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NTriplesWriter_C14N.java
+++
b/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NTriplesWriter_C14N.java
@@ -27,6 +27,6 @@ import org.apache.jena.riot.writer.NTriplesWriter;
public class NTriplesWriter_C14N extends NTriplesWriter {
@Override
protected NodeFormatter createNodeFormatter() {
- return new NodeFormatter_C14N(charSpace);
+ return new NodeFormatter_C14N();
}
}
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NodeFormatter_C14N.java
b/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NodeFormatter_C14N.java
index 4a1b008759..ea15985aa7 100644
---
a/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NodeFormatter_C14N.java
+++
b/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NodeFormatter_C14N.java
@@ -24,6 +24,7 @@ package org.apache.jena.riot.writer.c14n;
import org.apache.jena.atlas.io.AWriter;
import org.apache.jena.atlas.lib.CharSpace;
import org.apache.jena.atlas.lib.EscapeStr;
+import org.apache.jena.atlas.lib.Lib;
import org.apache.jena.riot.out.NodeFormatterBase;
import org.apache.jena.riot.out.quoted.QuotedStringOutput;
import org.apache.jena.riot.out.quoted.QuotedStringOutputNT;
@@ -37,7 +38,7 @@ public class NodeFormatter_C14N extends NodeFormatterBase
public NodeFormatter_C14N() { this(CharSpace.UTF8); }
- public NodeFormatter_C14N(CharSpace charSpace) {
+ protected NodeFormatter_C14N(CharSpace charSpace) {
quotedStringProc = new QuotedStringOutputNT(charSpace) {
@Override
public void writeStr(AWriter writer, String str) {
@@ -83,15 +84,18 @@ public class NodeFormatter_C14N extends NodeFormatterBase
public void formatLitLang(AWriter w, String lex, String langTag) {
writeEscaped(w, lex);
w.print('@');
- w.print(langTag);
+ String lcLangTag = Lib.lowercase(langTag);
+ w.print(lcLangTag);
}
@Override
public void formatLitLangDir(AWriter w, String lex, String langTag, String
direction) {
writeEscaped(w, lex);
w.print('@');
- w.print(langTag);
+ String lcLangTag = Lib.lowercase(langTag);
+ w.print(lcLangTag);
w.print("--");
+ // direction is always lowercase anyway.
w.print(direction);
}
diff --git a/jena-arq/src/test/java/org/apache/jena/riot/out/TestNodeFmt.java
b/jena-arq/src/test/java/org/apache/jena/riot/out/TestNodeFmt.java
index d7733263be..866edda51d 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/out/TestNodeFmt.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/out/TestNodeFmt.java
@@ -29,8 +29,10 @@ import org.junit.jupiter.api.Test;
import org.apache.jena.atlas.io.StringWriterI;
import org.apache.jena.atlas.lib.CharSpace;
import org.apache.jena.graph.Node;
+import org.apache.jena.graph.NodeFactory;
import org.apache.jena.riot.system.PrefixMap;
import org.apache.jena.riot.system.PrefixMapFactory;
+import org.apache.jena.riot.writer.c14n.NodeFormatter_C14N;
import org.apache.jena.sparql.core.Var;
import org.apache.jena.sparql.util.NodeFactoryExtra;
@@ -46,16 +48,17 @@ public class TestNodeFmt
private static NodeFormatter nodeFormatterNTutf8 = new
NodeFormatterNT(CharSpace.UTF8);
private static NodeFormatter nodeFormatterNTascii = new
NodeFormatterNT(CharSpace.ASCII);
private static NodeFormatter nodeFormatterTTL = new NodeFormatterTTL(base,
prefixMap);
+ private static NodeFormatter nodeFormatterC14N = new NodeFormatter_C14N();
public static void test(NodeFormatter nodeFormatter, String str)
{
test(nodeFormatter, str, str);
}
- public static void test(NodeFormatter nodeFormatter, String nStr , String
str)
+ public static void test(NodeFormatter nodeFormatter, String inputStr ,
String expectedStr)
{
- Node n = NodeFactoryExtra.parseNode(nStr);
- test(nodeFormatter, n, str);
+ Node n = NodeFactoryExtra.parseNode(inputStr);
+ test(nodeFormatter, n, expectedStr);
}
public static void test(NodeFormatter nodeFormatter, Node n , String str)
@@ -83,13 +86,10 @@ public class TestNodeFmt
@Test public void nodefmt_nt_10() { test(nodeFormatterNTutf8, "'Ω'",
"\"Ω\""); }
@Test public void nodefmt_nt_11() { test(nodeFormatterNTascii, "'Ω'",
"\"\\u03A9\""); }
-
- @Test public void nodefmt_nt_12() {
test(nodeFormatterNTascii,"<http://example/>"); }
- @Test public void nodefmt_nt_13() { test(nodeFormatterNTascii,
"\"abc\"^^<http://example/dt>"); }
-
- @Test public void nodefmt_nt_14() { test(nodeFormatterNTascii,
"'é'", "\"\\u00E9\""); }
-
- @Test public void nodefmt_nt_15() { test(nodeFormatterNTascii,
"'\\n\\t\\f'", "\"\\n\\t\\f\""); }
+ @Test public void nodefmt_nt_12() {
test(nodeFormatterNTascii,"<http://example/>"); }
+ @Test public void nodefmt_nt_13() { test(nodeFormatterNTascii,
"\"abc\"^^<http://example/dt>"); }
+ @Test public void nodefmt_nt_14() { test(nodeFormatterNTascii, "'é'",
"\"\\u00E9\""); }
+ @Test public void nodefmt_nt_15() { test(nodeFormatterNTascii,
"'\\n\\t\\f'", "\"\\n\\t\\f\""); }
// RDF 1.1 sensitive.
// xsd:strings output without ^^
@@ -103,6 +103,23 @@ public class TestNodeFmt
test(nodeFormatterNTutf8, "'abc'^^rdf:langString",
"\"abc\"^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>");
}
+ @Test public void nodefmt_rdf12_01() {
+ test(nodeFormatterNTutf8, "'abc'^^rdf:dirLangString",
"\"abc\"^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#dirLangString>");
+ }
+
+ // C14N -- Canonicalization. Java escaping make this messy.
+ private static Node blankNode = NodeFactory.createBlankNode("ABC");
+
+ @Test public void nodefmt_c14n_00() { test(nodeFormatterC14N,
"'--\\u0009--'", "\"--\\t--\""); }
+ @Test public void nodefmt_c14n_01() { test(nodeFormatterC14N,
"'--\\u0008\\u0009\\u000A\\u000C\\u000D\\u0022\\u005C--'",
"\"--\\b\\t\\n\\f\\r\\\"\\\\--\""); }
+ @Test public void nodefmt_c14n_02() { test(nodeFormatterC14N,
"'--\\b\\t\\n\\f\\r\\\"\\\\--'", "\"--\\b\\t\\n\\f\\r\\\"\\\\--\""); }
+ @Test public void nodefmt_c14n_03() { test(nodeFormatterC14N,
"'xyz'@EN-GB--ltr", "\"xyz\"@en-gb--ltr"); }
+ @Test public void nodefmt_c14n_04() { test(nodeFormatterC14N,
"'\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\u000B\\u007F'",
+
"\"\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\u000B\\u007F\""); }
+ @Test public void nodefmt_c14n_05() { test(nodeFormatterC14N,
"'\\u000E\\u000F\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0015\\u0016\\u0017\\u0018\\u0019\\u001A\\u001B\\u001C\\u001D\\u001F'",
+
"\"\\u000E\\u000F\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0015\\u0016\\u0017\\u0018\\u0019\\u001A\\u001B\\u001C\\u001D\\u001F\"");
}
+ @Test public void nodefmt_c14n_06() { test(nodeFormatterC14N,
blankNode, "_:ABC"); }
+
@Test public void nodefmt_ttl_01() { test(nodeFormatterTTL, "?x"); }
@Test public void nodefmt_ttl_02() { test(nodeFormatterTTL, "?xyz"); }
@Test public void nodefmt_ttl_03() { test(nodeFormatterTTL,
Var.alloc(""), "?"); }