This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git


The following commit(s) were added to refs/heads/main by this push:
     new 5ae97c93cb GH-3835: Fix for canonicalization of language tags
5ae97c93cb is described below

commit 5ae97c93cb1b5e6daa7bc7925990d451f21e0a41
Author: Andy Seaborne <[email protected]>
AuthorDate: Tue Apr 7 19:02:58 2026 +0100

    GH-3835: Fix for canonicalization of language tags
---
 .../jena/riot/writer/c14n/NQuadsWriter_C14N.java   |  2 +-
 .../jena/riot/writer/c14n/NTriplesWriter_C14N.java |  2 +-
 .../jena/riot/writer/c14n/NodeFormatter_C14N.java  | 10 ++++--
 .../java/org/apache/jena/riot/out/TestNodeFmt.java | 37 ++++++++++++++++------
 4 files changed, 36 insertions(+), 15 deletions(-)

diff --git 
a/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NQuadsWriter_C14N.java
 
b/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NQuadsWriter_C14N.java
index 700dde4c59..86c17dc2bc 100644
--- 
a/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NQuadsWriter_C14N.java
+++ 
b/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NQuadsWriter_C14N.java
@@ -27,6 +27,6 @@ import org.apache.jena.riot.writer.NQuadsWriter;
 public class NQuadsWriter_C14N extends NQuadsWriter {
     @Override
     protected NodeFormatter createNodeFormatter() {
-        return new NodeFormatter_C14N(charSpace);
+        return new NodeFormatter_C14N();
     }
 }
diff --git 
a/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NTriplesWriter_C14N.java
 
b/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NTriplesWriter_C14N.java
index 2894eaa51a..4b5604dd15 100644
--- 
a/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NTriplesWriter_C14N.java
+++ 
b/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NTriplesWriter_C14N.java
@@ -27,6 +27,6 @@ import org.apache.jena.riot.writer.NTriplesWriter;
 public class NTriplesWriter_C14N extends NTriplesWriter {
     @Override
     protected NodeFormatter createNodeFormatter() {
-        return new NodeFormatter_C14N(charSpace);
+        return new NodeFormatter_C14N();
     }
 }
diff --git 
a/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NodeFormatter_C14N.java
 
b/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NodeFormatter_C14N.java
index 4a1b008759..ea15985aa7 100644
--- 
a/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NodeFormatter_C14N.java
+++ 
b/jena-arq/src/main/java/org/apache/jena/riot/writer/c14n/NodeFormatter_C14N.java
@@ -24,6 +24,7 @@ package org.apache.jena.riot.writer.c14n;
 import org.apache.jena.atlas.io.AWriter;
 import org.apache.jena.atlas.lib.CharSpace;
 import org.apache.jena.atlas.lib.EscapeStr;
+import org.apache.jena.atlas.lib.Lib;
 import org.apache.jena.riot.out.NodeFormatterBase;
 import org.apache.jena.riot.out.quoted.QuotedStringOutput;
 import org.apache.jena.riot.out.quoted.QuotedStringOutputNT;
@@ -37,7 +38,7 @@ public class NodeFormatter_C14N extends NodeFormatterBase
 
     public NodeFormatter_C14N() { this(CharSpace.UTF8); }
 
-    public NodeFormatter_C14N(CharSpace charSpace) {
+    protected NodeFormatter_C14N(CharSpace charSpace) {
         quotedStringProc = new QuotedStringOutputNT(charSpace) {
             @Override
             public void writeStr(AWriter writer, String str) {
@@ -83,15 +84,18 @@ public class NodeFormatter_C14N extends NodeFormatterBase
     public void formatLitLang(AWriter w, String lex, String langTag) {
         writeEscaped(w, lex);
         w.print('@');
-        w.print(langTag);
+        String lcLangTag = Lib.lowercase(langTag);
+        w.print(lcLangTag);
     }
 
     @Override
     public void formatLitLangDir(AWriter w, String lex, String langTag, String 
direction) {
         writeEscaped(w, lex);
         w.print('@');
-        w.print(langTag);
+        String lcLangTag = Lib.lowercase(langTag);
+        w.print(lcLangTag);
         w.print("--");
+        // direction is always lowercase anyway.
         w.print(direction);
     }
 
diff --git a/jena-arq/src/test/java/org/apache/jena/riot/out/TestNodeFmt.java 
b/jena-arq/src/test/java/org/apache/jena/riot/out/TestNodeFmt.java
index d7733263be..866edda51d 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/out/TestNodeFmt.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/out/TestNodeFmt.java
@@ -29,8 +29,10 @@ import org.junit.jupiter.api.Test;
 import org.apache.jena.atlas.io.StringWriterI;
 import org.apache.jena.atlas.lib.CharSpace;
 import org.apache.jena.graph.Node;
+import org.apache.jena.graph.NodeFactory;
 import org.apache.jena.riot.system.PrefixMap;
 import org.apache.jena.riot.system.PrefixMapFactory;
+import org.apache.jena.riot.writer.c14n.NodeFormatter_C14N;
 import org.apache.jena.sparql.core.Var;
 import org.apache.jena.sparql.util.NodeFactoryExtra;
 
@@ -46,16 +48,17 @@ public class TestNodeFmt
     private static NodeFormatter nodeFormatterNTutf8 = new 
NodeFormatterNT(CharSpace.UTF8);
     private static NodeFormatter nodeFormatterNTascii = new 
NodeFormatterNT(CharSpace.ASCII);
     private static NodeFormatter nodeFormatterTTL = new NodeFormatterTTL(base, 
prefixMap);
+    private static NodeFormatter nodeFormatterC14N = new NodeFormatter_C14N();
 
     public static void test(NodeFormatter nodeFormatter, String str)
     {
         test(nodeFormatter, str, str);
     }
 
-    public static void test(NodeFormatter nodeFormatter, String nStr , String 
str)
+    public static void test(NodeFormatter nodeFormatter, String inputStr , 
String expectedStr)
     {
-        Node n = NodeFactoryExtra.parseNode(nStr);
-        test(nodeFormatter, n, str);
+        Node n = NodeFactoryExtra.parseNode(inputStr);
+        test(nodeFormatter, n, expectedStr);
     }
 
     public static void test(NodeFormatter nodeFormatter, Node n , String str)
@@ -83,13 +86,10 @@ public class TestNodeFmt
 
     @Test public void nodefmt_nt_10()  { test(nodeFormatterNTutf8, "'Ω'", 
"\"Ω\""); }
     @Test public void nodefmt_nt_11()  { test(nodeFormatterNTascii, "'Ω'", 
"\"\\u03A9\""); }
-
-    @Test public void nodefmt_nt_12()        { 
test(nodeFormatterNTascii,"<http://example/>"); }
-    @Test public void nodefmt_nt_13()        { test(nodeFormatterNTascii, 
"\"abc\"^^<http://example/dt>"); }
-
-    @Test public void nodefmt_nt_14()        { test(nodeFormatterNTascii, 
"'é'", "\"\\u00E9\""); }
-
-    @Test public void nodefmt_nt_15()        { test(nodeFormatterNTascii, 
"'\\n\\t\\f'", "\"\\n\\t\\f\""); }
+    @Test public void nodefmt_nt_12()  { 
test(nodeFormatterNTascii,"<http://example/>"); }
+    @Test public void nodefmt_nt_13()  { test(nodeFormatterNTascii, 
"\"abc\"^^<http://example/dt>"); }
+    @Test public void nodefmt_nt_14()  { test(nodeFormatterNTascii, "'é'", 
"\"\\u00E9\""); }
+    @Test public void nodefmt_nt_15()  { test(nodeFormatterNTascii, 
"'\\n\\t\\f'", "\"\\n\\t\\f\""); }
 
     // RDF 1.1 sensitive.
     // xsd:strings output without ^^
@@ -103,6 +103,23 @@ public class TestNodeFmt
         test(nodeFormatterNTutf8, "'abc'^^rdf:langString",  
"\"abc\"^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>");
     }
 
+    @Test public void nodefmt_rdf12_01() {
+        test(nodeFormatterNTutf8, "'abc'^^rdf:dirLangString",  
"\"abc\"^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#dirLangString>");
+    }
+
+    // C14N -- Canonicalization. Java escaping make this messy.
+    private static Node blankNode = NodeFactory.createBlankNode("ABC");
+
+    @Test public void nodefmt_c14n_00()        { test(nodeFormatterC14N, 
"'--\\u0009--'", "\"--\\t--\""); }
+    @Test public void nodefmt_c14n_01()        { test(nodeFormatterC14N, 
"'--\\u0008\\u0009\\u000A\\u000C\\u000D\\u0022\\u005C--'", 
"\"--\\b\\t\\n\\f\\r\\\"\\\\--\""); }
+    @Test public void nodefmt_c14n_02()        { test(nodeFormatterC14N, 
"'--\\b\\t\\n\\f\\r\\\"\\\\--'", "\"--\\b\\t\\n\\f\\r\\\"\\\\--\""); }
+    @Test public void nodefmt_c14n_03()        { test(nodeFormatterC14N, 
"'xyz'@EN-GB--ltr", "\"xyz\"@en-gb--ltr"); }
+    @Test public void nodefmt_c14n_04()        { test(nodeFormatterC14N, 
"'\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\u000B\\u007F'",
+                                                                         
"\"\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\u000B\\u007F\""); }
+    @Test public void nodefmt_c14n_05()        { test(nodeFormatterC14N, 
"'\\u000E\\u000F\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0015\\u0016\\u0017\\u0018\\u0019\\u001A\\u001B\\u001C\\u001D\\u001F'",
+                                                                         
"\"\\u000E\\u000F\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0015\\u0016\\u0017\\u0018\\u0019\\u001A\\u001B\\u001C\\u001D\\u001F\"");
 }
+    @Test public void nodefmt_c14n_06()        { test(nodeFormatterC14N, 
blankNode, "_:ABC"); }
+
     @Test public void nodefmt_ttl_01()  { test(nodeFormatterTTL, "?x"); }
     @Test public void nodefmt_ttl_02()  { test(nodeFormatterTTL, "?xyz"); }
     @Test public void nodefmt_ttl_03()  { test(nodeFormatterTTL, 
Var.alloc(""), "?"); }

Reply via email to