PR sarif-replay/123056 notes that when using sarif-replay to generate
HTML from a .sarif file containing an embedded "sarif:/" link we get
bogus output containing SGR codes.

The links in question come from GCC's sarif output for cross-referencing
event IDs within an execution path.

These links are JSON pointers.  I experimented with propertly supporting
the JSON Pointer spec (RFC 6901) within GCC, and I have a partially
working implementation which parses JSON pointers here, and, where
appropriate, reconstructs the pertinent event ID.

However, that feels too invasive to be pushing in stage 4.   Hence for
GCC 16, this patch simply skips the link part of "sarif:/" links in
sarif-replay, avoiding corrupt output, deferring the more ambitious
round-tripping fix to GCC 17.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r16-6931-g25910642d36709.

gcc/ChangeLog:
        PR sarif-replay/123056
        * libsarifreplay.cc (struct embedded_link): Move decl earlier.
        (sarif_replayer::append_embeddded_link): New.
        (sarif_replayer::make_plain_text_within_result_message): Move the
        link-replay logic to the above, and skip the link part of
        intra-sarif links.

gcc/testsuite/ChangeLog:
        PR sarif-replay/123056
        * sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif: New 
test.
        * sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py:
        New test script.
        * 
sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py:
        New test script.

Signed-off-by: David Malcolm <[email protected]>
---
 gcc/libsarifreplay.cc                         |  39 +++--
 .../3.11.6-embedded-links-pr123056.sarif      | 139 ++++++++++++++++++
 .../embedded-links-pr123056-check-html.py     |  25 ++++
 ...ed-links-pr123056-check-sarif-roundtrip.py |  14 ++
 4 files changed, 204 insertions(+), 13 deletions(-)
 create mode 100644 
gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif
 create mode 100644 
gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py
 create mode 100644 
gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py

diff --git a/gcc/libsarifreplay.cc b/gcc/libsarifreplay.cc
index 01d3a975f62..20a0aec3c7f 100644
--- a/gcc/libsarifreplay.cc
+++ b/gcc/libsarifreplay.cc
@@ -292,6 +292,12 @@ public:
   libgdiagnostics::message_buffer m_label;
 };
 
+struct embedded_link
+{
+  std::string text;
+  std::string destination;
+};
+
 using id_map = std::map<std::string, const json::string *>;
 
 class sarif_replayer
@@ -724,6 +730,10 @@ private:
     return sub;
   }
 
+  void
+  append_embeddded_link (libgdiagnostics::message_buffer &result,
+                        const embedded_link &link);
+
   /* The manager to replay the SARIF files to.  */
   libgdiagnostics::manager m_output_mgr;
 
@@ -1499,12 +1509,6 @@ maybe_consume_placeholder (const char *&iter_src, 
unsigned *out_arg_idx)
   return false;
 }
 
-struct embedded_link
-{
-  std::string text;
-  std::string destination;
-};
-
 /*  If ITER_SRC starts with an embedded link as per §3.11.6, advance ITER_SRC
     to immediately beyond the link, and return the link.
 
@@ -1578,6 +1582,21 @@ maybe_consume_embedded_link (const char *&iter_src)
   return std::make_unique<embedded_link> (std::move (result));
 }
 
+void
+sarif_replayer::append_embeddded_link (libgdiagnostics::message_buffer &result,
+                                      const embedded_link &link)
+{
+  /* We can't yet decode intra-sarif links, so simply use their text.  */
+  if (!strncmp (link.destination.c_str (), "sarif:/", strlen ("sarif:/")))
+    {
+      result += link.text.c_str ();
+      return;
+    }
+  result.begin_url (link.destination.c_str ());
+  result += link.text.c_str ();
+  result.end_url ();
+}
+
 /* Lookup the plain text string within a result.message (§3.27.11),
    and substitute for any placeholders (§3.11.5) and handle any
    embedded links (§3.11.6).
@@ -1662,13 +1681,7 @@ make_plain_text_within_result_message (const 
json::object *tool_component_obj,
            }
        }
       else if (auto link = maybe_consume_embedded_link (iter_src))
-       {
-         result.begin_url (link->destination.c_str ());
-         result += link->text.c_str ();
-         result.end_url ();
-         /* TODO: potentially could try to convert
-            intra-sarif links into event ids.  */
-       }
+       append_embeddded_link (result, *link);
       else
        {
          result += ch;
diff --git 
a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif
 
b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif
new file mode 100644
index 00000000000..cbf5f4af64b
--- /dev/null
+++ 
b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif
@@ -0,0 +1,139 @@
+/* { dg-additional-options 
"-fdiagnostics-add-output=experimental-html:file=3.11.6-embedded-links-pr123056.sarif.html,javascript=no"
 } */
+/* { dg-additional-options 
"-fdiagnostics-add-output=sarif:file=3.11.6-embedded-links-pr123056.sarif.roundtrip.sarif"
 } */
+
+{"$schema": 
"https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json";,
+ "version": "2.1.0",
+ "runs": [{"tool": {"driver": {"name": "GNU C23",
+                               "fullName": "GNU C23 (GCC) version 16.0.1 
20260114 (experimental) (x86_64-pc-linux-gnu)",
+                               "version": "16.0.1 20260114 (experimental)",
+                               "informationUri": "https://gcc.gnu.org/gcc-16/";,
+                               "rules": [{"id": "-Wanalyzer-malloc-leak",
+                                          "helpUri": 
"https://gcc.gnu.org/onlinedocs/gcc/Static-Analyzer-Options.html#index-Wanalyzer-malloc-leak"}]}},
+           "taxonomies": [{"name": "CWE",
+                           "version": "4.7",
+                           "organization": "MITRE",
+                           "shortDescription": {"text": "The MITRE Common 
Weakness Enumeration"},
+                           "taxa": [{"id": "401",
+                                     "helpUri": 
"https://cwe.mitre.org/data/definitions/401.html"}]}],
+           "invocations": [{"arguments": ["./cc1",
+                                          "-quiet",
+                                          "-iprefix",
+                                          
"/home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc/../lib/gcc/x86_64-pc-linux-gnu/16.0.1/",
+                                          "-isystem",
+                                          "./include",
+                                          "-isystem",
+                                          "./include-fixed",
+                                          "pr123056.c",
+                                          "-quiet",
+                                          "-dumpbase",
+                                          "pr123056.c",
+                                          "-dumpbase-ext",
+                                          ".c",
+                                          "-mtune=generic",
+                                          "-march=x86-64",
+                                          "-fanalyzer",
+                                          "-fdiagnostics-add-output=sarif",
+                                          
"-fdiagnostics-add-output=experimental-html",
+                                          "-o",
+                                          "pr123056.s"],
+                            "workingDirectory": {"uri": 
"/home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc"},
+                            "startTimeUtc": "2026-01-16T17:43:19Z",
+                            "executionSuccessful": true,
+                            "toolExecutionNotifications": [],
+                            "endTimeUtc": "2026-01-16T17:43:19Z"}],
+           "originalUriBaseIds": {"PWD": {"uri": 
"file:///home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc/"}},
+           "artifacts": [{"location": {"uri": "pr123056.c",
+                                       "uriBaseId": "PWD"},
+                          "sourceLanguage": "c",
+                          "contents": {"text": "void test (void)\n{\n  void *p 
= __builtin_malloc (1024);\n}\n"},
+                          "roles": ["analysisTarget",
+                                    "tracedFile"]}],
+           "results": [{"ruleId": "-Wanalyzer-malloc-leak",
+                        "taxa": [{"id": "401",
+                                  "toolComponent": {"name": "cwe"}}],
+                        "properties": {"gcc/analyzer/saved_diagnostic/sm": 
"malloc",
+                                       "gcc/analyzer/saved_diagnostic/ploc": 
{"enode": 5},
+                                       "gcc/analyzer/saved_diagnostic/var": 
"p_3",
+                                       "gcc/analyzer/saved_diagnostic/sval": 
"&HEAP_ALLOCATED_REGION(14)",
+                                       "gcc/analyzer/saved_diagnostic/state": 
"unchecked ({free})",
+                                       "gcc/analyzer/saved_diagnostic/idx": 0,
+                                       
"gcc/analyzer/saved_diagnostic/duplicates": [{"properties": 
{"gcc/analyzer/saved_diagnostic/sm": "malloc",
+                                                                               
                     "gcc/analyzer/saved_diagnostic/ploc": {"enode": 5},
+                                                                               
                     "gcc/analyzer/saved_diagnostic/var": "p_3",
+                                                                               
                     "gcc/analyzer/saved_diagnostic/sval": 
"&HEAP_ALLOCATED_REGION(14)",
+                                                                               
                     "gcc/analyzer/saved_diagnostic/state": "unchecked 
({free})",
+                                                                               
                     "gcc/analyzer/saved_diagnostic/idx": 1,
+                                                                               
                     "gcc/analyzer/pending_diagnostic/kind": "malloc_leak"}}],
+                                       "gcc/analyzer/pending_diagnostic/kind": 
"malloc_leak"},
+                        "level": "warning",
+                        "message": {"text": "leak of ‘p’"},
+                        "locations": [{"physicalLocation": 
{"artifactLocation": {"uri": "pr123056.c",
+                                                                               
  "uriBaseId": "PWD"},
+                                                            "region": 
{"startLine": 4,
+                                                                       
"startColumn": 1,
+                                                                       
"endColumn": 2},
+                                                            "contextRegion": 
{"startLine": 4,
+                                                                              
"snippet": {"text": "}\n"}}},
+                                       "logicalLocations": [{"index": 0,
+                                                             
"fullyQualifiedName": "test"}]}],
+                        "codeFlows": [{"threadFlows": [{"id": "main",
+                                                        "locations": 
[{"properties": {"gcc/analyzer/checker_event/emission_id": "(1)",
+                                                                               
       "gcc/analyzer/checker_event/kind": "state_change"},
+                                                                       
"location": {"physicalLocation": {"artifactLocation": {"uri": "pr123056.c",
+                                                                               
                                               "uriBaseId": "PWD"},
+                                                                               
                          "region": {"startLine": 3,
+                                                                               
                                     "startColumn": 13,
+                                                                               
                                     "endColumn": 36},
+                                                                               
                          "contextRegion": {"startLine": 3,
+                                                                               
                                            "snippet": {"text": "  void *p = 
__builtin_malloc (1024);\n"}}},
+                                                                               
     "logicalLocations": [{"index": 0,
+                                                                               
                           "fullyQualifiedName": "test"}],
+                                                                               
     "message": {"text": "allocated here"}},
+                                                                       
"kinds": ["acquire",
+                                                                               
  "memory"],
+                                                                       
"nestingLevel": 1,
+                                                                       
"executionOrder": 1},
+                                                                      
{"properties": {"gcc/analyzer/checker_event/emission_id": "(2)",
+                                                                               
       "gcc/analyzer/checker_event/kind": "warning"},
+                                                                       
"location": {"physicalLocation": {"artifactLocation": {"uri": "pr123056.c",
+                                                                               
                                               "uriBaseId": "PWD"},
+                                                                               
                          "region": {"startLine": 4,
+                                                                               
                                     "startColumn": 1,
+                                                                               
                                     "endColumn": 2},
+                                                                               
                          "contextRegion": {"startLine": 4,
+                                                                               
                                            "snippet": {"text": "}\n"}}},
+                                                                               
     "logicalLocations": [{"index": 0,
+                                                                               
                           "fullyQualifiedName": "test"}],
+                                                                               
     "message": {"text": "‘p’ leaks here; was allocated at 
[(1)](sarif:/runs/0/results/0/codeFlows/0/threadFlows/0/locations/0)"}},
+                                                                       
"kinds": ["danger"],
+                                                                       
"nestingLevel": 1,
+                                                                       
"executionOrder": 2}]}]}]}],
+           "logicalLocations": [{"name": "test",
+                                 "fullyQualifiedName": "test",
+                                 "decoratedName": "test",
+                                 "kind": "function",
+                                 "index": 0}]}]}
+
+/* { dg-begin-multiline-output "" }
+In function 'test':
+pr123056.c:4:1: warning: leak of ‘p’ [-Wanalyzer-malloc-leak]
+    4 | }
+      | ^
+  'test': events 1-2
+    3 |   void *p = __builtin_malloc (1024);
+      |             ^~~~~~~~~~~~~~~~~~~~~~~
+      |             |
+      |             (1) allocated here
+    4 | }
+      | ~            
+      | |
+      | (2) ‘p’ leaks here; was allocated at (1)
+   { dg-end-multiline-output "" } */
+
+/* Use a Python script to verify various properties about the generated
+   .html file:
+   { dg-final { run-html-pytest 3.11.6-embedded-links-pr123056.sarif 
"2.1.0-valid/embedded-links-pr123056-check-html.py" } } */
+
+/* Use a Python script to verify various properties about the *generated*
+   .sarif file:
+   { dg-final { run-sarif-pytest 
3.11.6-embedded-links-pr123056.sarif.roundtrip 
"2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py" } } */
diff --git 
a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py
 
b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py
new file mode 100644
index 00000000000..1e8069f2177
--- /dev/null
+++ 
b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py
@@ -0,0 +1,25 @@
+from htmltest import *
+
+import pytest
+
[email protected](scope='function', autouse=True)
+def html_tree():
+    return html_tree_from_env()
+
+def test_generated_html(html_tree):
+    root = html_tree.getroot ()
+    assert root.tag == make_tag('html')
+
+    head = root.find('xhtml:head', ns)
+    assert head is not None
+
+    diag = get_diag_by_index(html_tree, 0)
+
+    exec_path = diag.find("./xhtml:div[@id='execution-path']", ns)
+    assert exec_path is not None
+
+    label = exec_path.find('xhtml:label', ns)
+    assert label.text == 'Execution path with 2 events'
+
+    final_event = exec_path.find(".//xhtml:span[@id='gcc-diag-0-event-1']", ns)
+    assert final_event.text == '(2) ‘p’ leaks here; was allocated at (1)'
diff --git 
a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py
 
b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py
new file mode 100644
index 00000000000..5e45e2a0c7f
--- /dev/null
+++ 
b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py
@@ -0,0 +1,14 @@
+from sarif import *
+
+import pytest
+
[email protected](scope='function', autouse=True)
+def sarif():
+    return sarif_from_env()
+
+def test_roundtrip_of_url_in_generated_sarif(sarif):
+    result = get_result_by_index(sarif, 0)
+    assert result['level'] == 'warning'
+    assert result['message']['text'] == "leak of ‘p’"
+    assert 
(result['codeFlows'][0]['threadFlows'][0]['locations'][1]['location']['message']['text']
+            == "‘p’ leaks here; was allocated at (1)")
-- 
2.26.3

Reply via email to