Control: tags 1037206 + patch
Control: tags 1037206 + pending

Here is a proposed fix. I'll open a bullseye-pu bug next.

Corresponding git branch:
https://salsa.debian.org/smcv/appstream-glib/-/tree/debian/bullseye-proposed?ref_type=heads

Diff viewable at:
https://salsa.debian.org/smcv/appstream-glib/-/merge_requests/1
(not a MR against https://salsa.debian.org/pkgutopia-team/appstream-glib
because there isn't a bullseye branch there at the moment).

    smcv
diffstat for appstream-glib-0.7.18 appstream-glib-0.7.18

 debian/.gitignore                                                         |    1 
 debian/changelog                                                          |   10 
 debian/patches/Improve-handling-of-em-and-code-tags.patch                 |  220 ++++++++++
 debian/patches/Properly-initialize-AsNodeToXmlHelper.patch                |   34 +
 debian/patches/Support-em-code-tags.patch                                 |  118 +++++
 debian/patches/series                                                     |    4 
 debian/patches/trivial-Turn-is_-em-code-_text-fields-into-bitfields.patch |   26 +
 libappstream-glib/as-node.c                                               |  120 ++++-
 libappstream-glib/as-self-test.c                                          |   51 ++
 9 files changed, 552 insertions(+), 32 deletions(-)

diff -Nru appstream-glib-0.7.18/debian/changelog appstream-glib-0.7.18/debian/changelog
--- appstream-glib-0.7.18/debian/changelog	2020-12-21 23:14:10.000000000 +0000
+++ appstream-glib-0.7.18/debian/changelog	2023-06-07 19:25:59.000000000 +0100
@@ -1,3 +1,13 @@
+appstream-glib (0.7.18-1+deb11u1) bullseye; urgency=medium
+
+  * Add patches from upstream to cope with <em> and <code> in metadata.
+    Older versions of appstream-glib mis-parse upstream metadata that
+    contains <em> and <code>, causing flatpak 1.12.x or older to fail
+    to load the metadata now published by Flathub. The symptom is that
+    `flatpak search` fails. (Closes: #1037206, LP: #2023215)
+
+ -- Simon McVittie <s...@debian.org>  Wed, 07 Jun 2023 19:25:59 +0100
+
 appstream-glib (0.7.18-1) unstable; urgency=medium
 
   [ Matthias Klumpp ]
diff -Nru appstream-glib-0.7.18/debian/.gitignore appstream-glib-0.7.18/debian/.gitignore
--- appstream-glib-0.7.18/debian/.gitignore	1970-01-01 01:00:00.000000000 +0100
+++ appstream-glib-0.7.18/debian/.gitignore	2023-06-07 19:25:59.000000000 +0100
@@ -0,0 +1 @@
+*~
diff -Nru appstream-glib-0.7.18/debian/patches/Improve-handling-of-em-and-code-tags.patch appstream-glib-0.7.18/debian/patches/Improve-handling-of-em-and-code-tags.patch
--- appstream-glib-0.7.18/debian/patches/Improve-handling-of-em-and-code-tags.patch	1970-01-01 01:00:00.000000000 +0100
+++ appstream-glib-0.7.18/debian/patches/Improve-handling-of-em-and-code-tags.patch	2023-06-07 19:25:59.000000000 +0100
@@ -0,0 +1,220 @@
+From: "Jan Alexander Steffens (heftig)" <jan.steff...@gmail.com>
+Date: Fri, 15 Jul 2022 21:18:47 +0200
+Subject: Improve handling of <em> and <code> tags
+
+This is still not great code but at least somewhat an improvement. Tests
+were expanded to showcase the new behavior.
+
+I think, ideally, we would append opening/closing tags to the ancestor
+`p` or `li` node's cdata as soon as we encounter the start/end of an
+`em` or `code` element. This would then also handle empty elements
+correctly.
+
+Origin: https://github.com/hughsie/appstream-glib/pull/446
+Applied-upstream: 0.8.1, commit:674490bd54ff206f213ca4547db7fdb591a0fb3d
+Bug-Debian: https://bugs.debian.org/1037206
+---
+ libappstream-glib/as-node.c      | 108 +++++++++++++++++++++++----------------
+ libappstream-glib/as-self-test.c |  39 +++++++++++++-
+ 2 files changed, 101 insertions(+), 46 deletions(-)
+
+diff --git a/libappstream-glib/as-node.c b/libappstream-glib/as-node.c
+index 5e19337..655b947 100644
+--- a/libappstream-glib/as-node.c
++++ b/libappstream-glib/as-node.c
+@@ -674,6 +674,7 @@ as_node_end_element_cb (GMarkupParseContext *context,
+ 			GError             **error)
+ {
+ 	AsNodeToXmlHelper *helper = (AsNodeToXmlHelper *) user_data;
++	AsNodeData *data = helper->current->data;
+ 
+ 	/* do not create a child node for em and code tags */
+ 	if (g_strcmp0 (element_name, "em") == 0) {
+@@ -684,6 +685,42 @@ as_node_end_element_cb (GMarkupParseContext *context,
+ 		helper->is_code_text = 0;
+ 		return;
+ 	}
++
++	if (data->cdata != NULL) {
++		/* split up into lines and add each with spaces stripped */
++		if ((helper->flags & AS_NODE_FROM_XML_FLAG_LITERAL_TEXT) == 0) {
++			AsRefString *cdata = data->cdata;
++			data->cdata = as_node_reflow_text (cdata, strlen (cdata));
++			as_ref_string_unref (cdata);
++		}
++
++		/* intern commonly duplicated tag values and save a bit of memory */
++		if (data->is_tag_valid) {
++			AsNode *root = g_node_get_root (helper->current);
++			switch (data->tag) {
++			case AS_TAG_CATEGORY:
++			case AS_TAG_COMPULSORY_FOR_DESKTOP:
++			case AS_TAG_CONTENT_ATTRIBUTE:
++			case AS_TAG_DEVELOPER_NAME:
++			case AS_TAG_EXTENDS:
++			case AS_TAG_ICON:
++			case AS_TAG_ID:
++			case AS_TAG_KUDO:
++			case AS_TAG_LANG:
++			case AS_TAG_METADATA_LICENSE:
++			case AS_TAG_MIMETYPE:
++			case AS_TAG_PROJECT_GROUP:
++			case AS_TAG_PROJECT_LICENSE:
++			case AS_TAG_SOURCE_PKGNAME:
++			case AS_TAG_URL:
++				as_node_cdata_to_intern (root, data);
++				break;
++			default:
++				break;
++			}
++		}
++	}
++
+ 	helper->current = helper->current->parent;
+ }
+ 
+@@ -715,22 +752,9 @@ as_node_text_cb (GMarkupParseContext *context,
+ 	if (i >= text_len)
+ 		return;
+ 
+-	/* split up into lines and add each with spaces stripped */
+-	if (data->cdata != NULL) {
+-		/* support em and code tags */
+-		if (g_strcmp0 (as_tag_data_get_name (data), "p") == 0 ||
+-			g_strcmp0 (as_tag_data_get_name (data), "li") == 0) {
+-			g_autoptr(GString) str = g_string_new (data->cdata);
+-			as_ref_string_unref (data->cdata);
+-			if (helper->is_em_text)
+-				g_string_append_printf (str, "<em>%s</em>", text);
+-			else if (helper->is_code_text)
+-				g_string_append_printf (str, "<code>%s</code>", text);
+-			else
+-				g_string_append (str, text);
+-			data->cdata = as_ref_string_new_with_length (str->str, str->len);
+-			return;
+-		}
++	if (data->cdata != NULL &&
++	    g_strcmp0 (as_tag_data_get_name (data), "p") != 0 &&
++	    g_strcmp0 (as_tag_data_get_name (data), "li") != 0) {
+ 		g_set_error (error,
+ 			     AS_NODE_ERROR,
+ 			     AS_NODE_ERROR_INVALID_MARKUP,
+@@ -739,37 +763,33 @@ as_node_text_cb (GMarkupParseContext *context,
+ 			     data->cdata, text);
+ 		return;
+ 	}
+-	if ((helper->flags & AS_NODE_FROM_XML_FLAG_LITERAL_TEXT) > 0) {
+-		data->cdata = as_ref_string_new_with_length (text, text_len + 1);
+-	} else {
+-		data->cdata = as_node_reflow_text (text, (gssize) text_len);
+-	}
+ 
+-	/* intern commonly duplicated tag values and save a bit of memory */
+-	if (data->is_tag_valid && data->cdata != NULL) {
+-		AsNode *root = g_node_get_root (helper->current);
+-		switch (data->tag) {
+-		case AS_TAG_CATEGORY:
+-		case AS_TAG_COMPULSORY_FOR_DESKTOP:
+-		case AS_TAG_CONTENT_ATTRIBUTE:
+-		case AS_TAG_DEVELOPER_NAME:
+-		case AS_TAG_EXTENDS:
+-		case AS_TAG_ICON:
+-		case AS_TAG_ID:
+-		case AS_TAG_KUDO:
+-		case AS_TAG_LANG:
+-		case AS_TAG_METADATA_LICENSE:
+-		case AS_TAG_MIMETYPE:
+-		case AS_TAG_PROJECT_GROUP:
+-		case AS_TAG_PROJECT_LICENSE:
+-		case AS_TAG_SOURCE_PKGNAME:
+-		case AS_TAG_URL:
+-			as_node_cdata_to_intern (root, data);
+-			break;
+-		default:
+-			break;
++	/* support em and code tags */
++	if (helper->is_em_text || helper->is_code_text || data->cdata != NULL) {
++		g_autoptr(GString) str = g_string_new (NULL);
++
++		if (data->cdata != NULL) {
++			g_string_append (str, data->cdata);
++			as_ref_string_unref (data->cdata);
+ 		}
++
++		if (helper->is_em_text)
++			g_string_append (str, "<em>");
++		if (helper->is_code_text)
++			g_string_append (str, "<code>");
++
++		g_string_append_len (str, text, text_len);
++
++		if (helper->is_code_text)
++			g_string_append (str, "</code>");
++		if (helper->is_em_text)
++			g_string_append (str, "</em>");
++
++		data->cdata = as_ref_string_new_with_length (str->str, str->len);
++		return;
+ 	}
++
++	data->cdata = as_ref_string_new_with_length (text, text_len);
+ }
+ 
+ static void
+diff --git a/libappstream-glib/as-self-test.c b/libappstream-glib/as-self-test.c
+index 3886e4b..44b32ab 100644
+--- a/libappstream-glib/as-self-test.c
++++ b/libappstream-glib/as-self-test.c
+@@ -2866,6 +2866,15 @@ as_test_node_xml_func (void)
+ 			     "It now also supports <em>em</em> and <code>code</code> tags."
+ 			     "</p>"
+ 			     "</description>";
++	const gchar *valid_em_code_2 = "<description>"
++			     "<p><em>Emphasis</em> at the start of the paragraph</p>"
++			     "</description>";
++	const gchar *valid_em_code_empty = "<description>"
++			     "<p><em></em></p>"
++			     "</description>";
++	const gchar *valid_em_code_empty_2 = "<description>"
++			     "<p>empty <em></em> emphasis</p>"
++			     "</description>";
+ 	GError *error = NULL;
+ 	AsNode *n2;
+ 	AsNode *root;
+@@ -2936,8 +2945,34 @@ as_test_node_xml_func (void)
+ 
+ 	n2 = as_node_find (root, "description/p");
+ 	g_assert (n2 != NULL);
+-	printf ("<%s>\n", as_node_get_data (n2));
+-	g_assert_cmpstr (as_node_get_data (n2), ==, "It now also supports<em>em</em> and <code>code</code> tags.");
++	g_assert_cmpstr (as_node_get_data (n2), ==, "It now also supports <em>em</em> and <code>code</code> tags.");
++	as_node_unref (root);
++
++	root = as_node_from_xml (valid_em_code_2, 0, &error);
++	g_assert_no_error (error);
++	g_assert (root != NULL);
++
++	n2 = as_node_find (root, "description/p");
++	g_assert (n2 != NULL);
++	g_assert_cmpstr (as_node_get_data (n2), ==, "<em>Emphasis</em> at the start of the paragraph");
++	as_node_unref (root);
++
++	root = as_node_from_xml (valid_em_code_empty, 0, &error);
++	g_assert_no_error (error);
++	g_assert (root != NULL);
++
++	n2 = as_node_find (root, "description/p");
++	g_assert (n2 != NULL);
++	g_assert_cmpstr (as_node_get_data (n2), ==, NULL);
++	as_node_unref (root);
++
++	root = as_node_from_xml (valid_em_code_empty_2, 0, &error);
++	g_assert_no_error (error);
++	g_assert (root != NULL);
++
++	n2 = as_node_find (root, "description/p");
++	g_assert (n2 != NULL);
++	g_assert_cmpstr (as_node_get_data (n2), ==, "empty  emphasis");
+ 	as_node_unref (root);
+ 
+ 	/* keep comments */
diff -Nru appstream-glib-0.7.18/debian/patches/Properly-initialize-AsNodeToXmlHelper.patch appstream-glib-0.7.18/debian/patches/Properly-initialize-AsNodeToXmlHelper.patch
--- appstream-glib-0.7.18/debian/patches/Properly-initialize-AsNodeToXmlHelper.patch	1970-01-01 01:00:00.000000000 +0100
+++ appstream-glib-0.7.18/debian/patches/Properly-initialize-AsNodeToXmlHelper.patch	2023-06-07 19:25:59.000000000 +0100
@@ -0,0 +1,34 @@
+From: "Jan Alexander Steffens (heftig)" <jan.steff...@gmail.com>
+Date: Fri, 15 Jul 2022 20:33:50 +0200
+Subject: Properly initialize AsNodeToXmlHelper
+
+Bug: https://github.com/hughsie/appstream-glib/issues/445
+Origin: https://github.com/hughsie/appstream-glib/pull/446
+Applied-upstream: 0.8.1, commit:3870226a3585be4c31c1719248be0e17d789f3d7
+Bug-Debian: https://bugs.debian.org/1037206
+---
+ libappstream-glib/as-node.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/libappstream-glib/as-node.c b/libappstream-glib/as-node.c
+index b4159ea..c5eb8a1 100644
+--- a/libappstream-glib/as-node.c
++++ b/libappstream-glib/as-node.c
+@@ -826,7 +826,7 @@ as_node_from_xml_internal (const gchar *data, gssize data_sz,
+ 			   AsNodeFromXmlFlags flags,
+ 			   GError **error)
+ {
+-	AsNodeToXmlHelper helper;
++	AsNodeToXmlHelper helper = {0};
+ 	AsNode *root = NULL;
+ 	gboolean ret;
+ 	g_autoptr(GError) error_local = NULL;
+@@ -963,7 +963,7 @@ as_node_from_file (GFile *file,
+ 		   GCancellable *cancellable,
+ 		   GError **error)
+ {
+-	AsNodeToXmlHelper helper;
++	AsNodeToXmlHelper helper = {0};
+ 	GError *error_local = NULL;
+ 	AsNode *root = NULL;
+ 	const gchar *content_type = NULL;
diff -Nru appstream-glib-0.7.18/debian/patches/series appstream-glib-0.7.18/debian/patches/series
--- appstream-glib-0.7.18/debian/patches/series	1970-01-01 01:00:00.000000000 +0100
+++ appstream-glib-0.7.18/debian/patches/series	2023-06-07 19:25:59.000000000 +0100
@@ -0,0 +1,4 @@
+Support-em-code-tags.patch
+Properly-initialize-AsNodeToXmlHelper.patch
+trivial-Turn-is_-em-code-_text-fields-into-bitfields.patch
+Improve-handling-of-em-and-code-tags.patch
diff -Nru appstream-glib-0.7.18/debian/patches/Support-em-code-tags.patch appstream-glib-0.7.18/debian/patches/Support-em-code-tags.patch
--- appstream-glib-0.7.18/debian/patches/Support-em-code-tags.patch	1970-01-01 01:00:00.000000000 +0100
+++ appstream-glib-0.7.18/debian/patches/Support-em-code-tags.patch	2023-06-07 19:25:59.000000000 +0100
@@ -0,0 +1,118 @@
+From: =?utf-8?q?Philip_M=C3=BCller?= <ph...@manjaro.org>
+Date: Mon, 14 Jun 2021 22:06:01 +0200
+Subject: Support em/code tags
+
+Some appstream-data packages add <em> and </em> or <code> and </code>
+to the files. Not all package manager can handle that. An example would
+be pamac from Manjaro
+
+Origin: https://github.com/hughsie/appstream-glib/pull/403
+Applied-upstream: 0.8.0, commit:f939f14774618fd07d7019e9d0c86e1e1ae5642a
+Bug-Debian: https://bugs.debian.org/1037206
+---
+ libappstream-glib/as-node.c      | 36 ++++++++++++++++++++++++++++++++++++
+ libappstream-glib/as-self-test.c | 16 ++++++++++++++++
+ 2 files changed, 52 insertions(+)
+
+diff --git a/libappstream-glib/as-node.c b/libappstream-glib/as-node.c
+index aafb16a..b4159ea 100644
+--- a/libappstream-glib/as-node.c
++++ b/libappstream-glib/as-node.c
+@@ -555,6 +555,8 @@ typedef struct {
+ 	AsNode			*current;
+ 	AsNodeFromXmlFlags	 flags;
+ 	const gchar * const	*locales;
++	guint8			 is_em_text;
++	guint8			 is_code_text;
+ } AsNodeToXmlHelper;
+ 
+ /**
+@@ -604,6 +606,16 @@ as_node_start_element_cb (GMarkupParseContext *context,
+ 	AsNode *current;
+ 	guint i;
+ 
++	/* do not create a child node for em and code tags */
++	if (g_strcmp0 (element_name, "em") == 0) {
++		helper->is_em_text = 1;
++		return;
++	}
++	if (g_strcmp0 (element_name, "code") == 0) {
++		helper->is_code_text = 1;
++		return;
++	}
++
+ 	/* check if we should ignore the locale */
+ 	data = g_slice_new0 (AsNodeData);
+ 
+@@ -662,6 +674,16 @@ as_node_end_element_cb (GMarkupParseContext *context,
+ 			GError             **error)
+ {
+ 	AsNodeToXmlHelper *helper = (AsNodeToXmlHelper *) user_data;
++
++	/* do not create a child node for em and code tags */
++	if (g_strcmp0 (element_name, "em") == 0) {
++		helper->is_em_text = 0;
++		return;
++	}
++	if (g_strcmp0 (element_name, "code") == 0) {
++		helper->is_code_text = 0;
++		return;
++	}
+ 	helper->current = helper->current->parent;
+ }
+ 
+@@ -695,6 +717,20 @@ as_node_text_cb (GMarkupParseContext *context,
+ 
+ 	/* split up into lines and add each with spaces stripped */
+ 	if (data->cdata != NULL) {
++		/* support em and code tags */
++		if (g_strcmp0 (as_tag_data_get_name (data), "p") == 0 ||
++			g_strcmp0 (as_tag_data_get_name (data), "li") == 0) {
++			g_autoptr(GString) str = g_string_new (data->cdata);
++			as_ref_string_unref (data->cdata);
++			if (helper->is_em_text)
++				g_string_append_printf (str, "<em>%s</em>", text);
++			else if (helper->is_code_text)
++				g_string_append_printf (str, "<code>%s</code>", text);
++			else
++				g_string_append (str, text);
++			data->cdata = as_ref_string_new_with_length (str->str, str->len);
++			return;
++		}
+ 		g_set_error (error,
+ 			     AS_NODE_ERROR,
+ 			     AS_NODE_ERROR_INVALID_MARKUP,
+diff --git a/libappstream-glib/as-self-test.c b/libappstream-glib/as-self-test.c
+index 78af947..3886e4b 100644
+--- a/libappstream-glib/as-self-test.c
++++ b/libappstream-glib/as-self-test.c
+@@ -2861,6 +2861,11 @@ as_test_node_xml_func (void)
+ 			     "<!-- this documents bar -->"
+ 			     "<bar key=\"value\">baz</bar>"
+ 			     "</foo>";
++	const gchar *valid_em_code = "<description>"
++			     "<p>"
++			     "It now also supports <em>em</em> and <code>code</code> tags."
++			     "</p>"
++			     "</description>";
+ 	GError *error = NULL;
+ 	AsNode *n2;
+ 	AsNode *root;
+@@ -2924,6 +2929,17 @@ as_test_node_xml_func (void)
+ 	g_string_free (xml, TRUE);
+ 	as_node_unref (root);
+ 
++	/* support em and code tags */
++	root = as_node_from_xml (valid_em_code, 0, &error);
++	g_assert_no_error (error);
++	g_assert (root != NULL);
++
++	n2 = as_node_find (root, "description/p");
++	g_assert (n2 != NULL);
++	printf ("<%s>\n", as_node_get_data (n2));
++	g_assert_cmpstr (as_node_get_data (n2), ==, "It now also supports<em>em</em> and <code>code</code> tags.");
++	as_node_unref (root);
++
+ 	/* keep comments */
+ 	root = as_node_from_xml (valid,
+ 				 AS_NODE_FROM_XML_FLAG_KEEP_COMMENTS,
diff -Nru appstream-glib-0.7.18/debian/patches/trivial-Turn-is_-em-code-_text-fields-into-bitfields.patch appstream-glib-0.7.18/debian/patches/trivial-Turn-is_-em-code-_text-fields-into-bitfields.patch
--- appstream-glib-0.7.18/debian/patches/trivial-Turn-is_-em-code-_text-fields-into-bitfields.patch	1970-01-01 01:00:00.000000000 +0100
+++ appstream-glib-0.7.18/debian/patches/trivial-Turn-is_-em-code-_text-fields-into-bitfields.patch	2023-06-07 19:25:59.000000000 +0100
@@ -0,0 +1,26 @@
+From: "Jan Alexander Steffens (heftig)" <jan.steff...@gmail.com>
+Date: Fri, 15 Jul 2022 20:34:59 +0200
+Subject: trivial: Turn is_{em,code}_text fields into bitfields
+
+Origin: https://github.com/hughsie/appstream-glib/pull/446
+Applied-upstream: 0.8.1, commit:8d39640032752bf81d648d018ff115aa8d495957
+Bug-Debian: https://bugs.debian.org/1037206
+---
+ libappstream-glib/as-node.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/libappstream-glib/as-node.c b/libappstream-glib/as-node.c
+index c5eb8a1..5e19337 100644
+--- a/libappstream-glib/as-node.c
++++ b/libappstream-glib/as-node.c
+@@ -555,8 +555,8 @@ typedef struct {
+ 	AsNode			*current;
+ 	AsNodeFromXmlFlags	 flags;
+ 	const gchar * const	*locales;
+-	guint8			 is_em_text;
+-	guint8			 is_code_text;
++	guint8			 is_em_text:1;
++	guint8			 is_code_text:1;
+ } AsNodeToXmlHelper;
+ 
+ /**
diff -Nru appstream-glib-0.7.18/libappstream-glib/as-node.c appstream-glib-0.7.18/libappstream-glib/as-node.c
--- appstream-glib-0.7.18/libappstream-glib/as-node.c	2020-09-07 11:20:43.894573000 +0100
+++ appstream-glib-0.7.18/libappstream-glib/as-node.c	2023-06-07 20:59:06.000000000 +0100
@@ -555,6 +555,8 @@
 	AsNode			*current;
 	AsNodeFromXmlFlags	 flags;
 	const gchar * const	*locales;
+	guint8			 is_em_text:1;
+	guint8			 is_code_text:1;
 } AsNodeToXmlHelper;
 
 /**
@@ -604,6 +606,16 @@
 	AsNode *current;
 	guint i;
 
+	/* do not create a child node for em and code tags */
+	if (g_strcmp0 (element_name, "em") == 0) {
+		helper->is_em_text = 1;
+		return;
+	}
+	if (g_strcmp0 (element_name, "code") == 0) {
+		helper->is_code_text = 1;
+		return;
+	}
+
 	/* check if we should ignore the locale */
 	data = g_slice_new0 (AsNodeData);
 
@@ -662,6 +674,53 @@
 			GError             **error)
 {
 	AsNodeToXmlHelper *helper = (AsNodeToXmlHelper *) user_data;
+	AsNodeData *data = helper->current->data;
+
+	/* do not create a child node for em and code tags */
+	if (g_strcmp0 (element_name, "em") == 0) {
+		helper->is_em_text = 0;
+		return;
+	}
+	if (g_strcmp0 (element_name, "code") == 0) {
+		helper->is_code_text = 0;
+		return;
+	}
+
+	if (data->cdata != NULL) {
+		/* split up into lines and add each with spaces stripped */
+		if ((helper->flags & AS_NODE_FROM_XML_FLAG_LITERAL_TEXT) == 0) {
+			AsRefString *cdata = data->cdata;
+			data->cdata = as_node_reflow_text (cdata, strlen (cdata));
+			as_ref_string_unref (cdata);
+		}
+
+		/* intern commonly duplicated tag values and save a bit of memory */
+		if (data->is_tag_valid) {
+			AsNode *root = g_node_get_root (helper->current);
+			switch (data->tag) {
+			case AS_TAG_CATEGORY:
+			case AS_TAG_COMPULSORY_FOR_DESKTOP:
+			case AS_TAG_CONTENT_ATTRIBUTE:
+			case AS_TAG_DEVELOPER_NAME:
+			case AS_TAG_EXTENDS:
+			case AS_TAG_ICON:
+			case AS_TAG_ID:
+			case AS_TAG_KUDO:
+			case AS_TAG_LANG:
+			case AS_TAG_METADATA_LICENSE:
+			case AS_TAG_MIMETYPE:
+			case AS_TAG_PROJECT_GROUP:
+			case AS_TAG_PROJECT_LICENSE:
+			case AS_TAG_SOURCE_PKGNAME:
+			case AS_TAG_URL:
+				as_node_cdata_to_intern (root, data);
+				break;
+			default:
+				break;
+			}
+		}
+	}
+
 	helper->current = helper->current->parent;
 }
 
@@ -693,8 +752,9 @@
 	if (i >= text_len)
 		return;
 
-	/* split up into lines and add each with spaces stripped */
-	if (data->cdata != NULL) {
+	if (data->cdata != NULL &&
+	    g_strcmp0 (as_tag_data_get_name (data), "p") != 0 &&
+	    g_strcmp0 (as_tag_data_get_name (data), "li") != 0) {
 		g_set_error (error,
 			     AS_NODE_ERROR,
 			     AS_NODE_ERROR_INVALID_MARKUP,
@@ -703,37 +763,33 @@
 			     data->cdata, text);
 		return;
 	}
-	if ((helper->flags & AS_NODE_FROM_XML_FLAG_LITERAL_TEXT) > 0) {
-		data->cdata = as_ref_string_new_with_length (text, text_len + 1);
-	} else {
-		data->cdata = as_node_reflow_text (text, (gssize) text_len);
-	}
 
-	/* intern commonly duplicated tag values and save a bit of memory */
-	if (data->is_tag_valid && data->cdata != NULL) {
-		AsNode *root = g_node_get_root (helper->current);
-		switch (data->tag) {
-		case AS_TAG_CATEGORY:
-		case AS_TAG_COMPULSORY_FOR_DESKTOP:
-		case AS_TAG_CONTENT_ATTRIBUTE:
-		case AS_TAG_DEVELOPER_NAME:
-		case AS_TAG_EXTENDS:
-		case AS_TAG_ICON:
-		case AS_TAG_ID:
-		case AS_TAG_KUDO:
-		case AS_TAG_LANG:
-		case AS_TAG_METADATA_LICENSE:
-		case AS_TAG_MIMETYPE:
-		case AS_TAG_PROJECT_GROUP:
-		case AS_TAG_PROJECT_LICENSE:
-		case AS_TAG_SOURCE_PKGNAME:
-		case AS_TAG_URL:
-			as_node_cdata_to_intern (root, data);
-			break;
-		default:
-			break;
+	/* support em and code tags */
+	if (helper->is_em_text || helper->is_code_text || data->cdata != NULL) {
+		g_autoptr(GString) str = g_string_new (NULL);
+
+		if (data->cdata != NULL) {
+			g_string_append (str, data->cdata);
+			as_ref_string_unref (data->cdata);
 		}
+
+		if (helper->is_em_text)
+			g_string_append (str, "<em>");
+		if (helper->is_code_text)
+			g_string_append (str, "<code>");
+
+		g_string_append_len (str, text, text_len);
+
+		if (helper->is_code_text)
+			g_string_append (str, "</code>");
+		if (helper->is_em_text)
+			g_string_append (str, "</em>");
+
+		data->cdata = as_ref_string_new_with_length (str->str, str->len);
+		return;
 	}
+
+	data->cdata = as_ref_string_new_with_length (text, text_len);
 }
 
 static void
@@ -790,7 +846,7 @@
 			   AsNodeFromXmlFlags flags,
 			   GError **error)
 {
-	AsNodeToXmlHelper helper;
+	AsNodeToXmlHelper helper = {0};
 	AsNode *root = NULL;
 	gboolean ret;
 	g_autoptr(GError) error_local = NULL;
@@ -927,7 +983,7 @@
 		   GCancellable *cancellable,
 		   GError **error)
 {
-	AsNodeToXmlHelper helper;
+	AsNodeToXmlHelper helper = {0};
 	GError *error_local = NULL;
 	AsNode *root = NULL;
 	const gchar *content_type = NULL;
diff -Nru appstream-glib-0.7.18/libappstream-glib/as-self-test.c appstream-glib-0.7.18/libappstream-glib/as-self-test.c
--- appstream-glib-0.7.18/libappstream-glib/as-self-test.c	2020-09-07 11:20:43.896573000 +0100
+++ appstream-glib-0.7.18/libappstream-glib/as-self-test.c	2023-06-07 20:59:06.000000000 +0100
@@ -2861,6 +2861,20 @@
 			     "<!-- this documents bar -->"
 			     "<bar key=\"value\">baz</bar>"
 			     "</foo>";
+	const gchar *valid_em_code = "<description>"
+			     "<p>"
+			     "It now also supports <em>em</em> and <code>code</code> tags."
+			     "</p>"
+			     "</description>";
+	const gchar *valid_em_code_2 = "<description>"
+			     "<p><em>Emphasis</em> at the start of the paragraph</p>"
+			     "</description>";
+	const gchar *valid_em_code_empty = "<description>"
+			     "<p><em></em></p>"
+			     "</description>";
+	const gchar *valid_em_code_empty_2 = "<description>"
+			     "<p>empty <em></em> emphasis</p>"
+			     "</description>";
 	GError *error = NULL;
 	AsNode *n2;
 	AsNode *root;
@@ -2924,6 +2938,43 @@
 	g_string_free (xml, TRUE);
 	as_node_unref (root);
 
+	/* support em and code tags */
+	root = as_node_from_xml (valid_em_code, 0, &error);
+	g_assert_no_error (error);
+	g_assert (root != NULL);
+
+	n2 = as_node_find (root, "description/p");
+	g_assert (n2 != NULL);
+	g_assert_cmpstr (as_node_get_data (n2), ==, "It now also supports <em>em</em> and <code>code</code> tags.");
+	as_node_unref (root);
+
+	root = as_node_from_xml (valid_em_code_2, 0, &error);
+	g_assert_no_error (error);
+	g_assert (root != NULL);
+
+	n2 = as_node_find (root, "description/p");
+	g_assert (n2 != NULL);
+	g_assert_cmpstr (as_node_get_data (n2), ==, "<em>Emphasis</em> at the start of the paragraph");
+	as_node_unref (root);
+
+	root = as_node_from_xml (valid_em_code_empty, 0, &error);
+	g_assert_no_error (error);
+	g_assert (root != NULL);
+
+	n2 = as_node_find (root, "description/p");
+	g_assert (n2 != NULL);
+	g_assert_cmpstr (as_node_get_data (n2), ==, NULL);
+	as_node_unref (root);
+
+	root = as_node_from_xml (valid_em_code_empty_2, 0, &error);
+	g_assert_no_error (error);
+	g_assert (root != NULL);
+
+	n2 = as_node_find (root, "description/p");
+	g_assert (n2 != NULL);
+	g_assert_cmpstr (as_node_get_data (n2), ==, "empty  emphasis");
+	as_node_unref (root);
+
 	/* keep comments */
 	root = as_node_from_xml (valid,
 				 AS_NODE_FROM_XML_FLAG_KEEP_COMMENTS,

Reply via email to