Rudolf Adamkovič writes:
Also, perhaps just "–" instead of "–" ; EN DASH?
This seems like a good idea. ox-html.el already uses a lot of named
character references. The attached patch changes all remaining numeric
references to named ones as well.
This patch is unrelated to my other patch series and based on main. It
conflicts with my patches though, so I'll rebase the v2 of my patches
depending on whether we want to land this or not.
~lukas
From 1de06f2899c25b20df88fa55e034858da1a871b8 Mon Sep 17 00:00:00 2001
From: Lukas Epple <em...@lukasepple.de>
Date: Mon, 21 Jul 2025 11:46:44 +0200
Subject: [PATCH] lisp/ox-html.el: Convert numeric to named character
references
* lisp/ox-html.el (org-html-special-string-regexps, org-html--tags,
org-html-format-headline-default-function, org-html-timestamp,
org-html-table-cell, org-html-verse-block): Replace numeric character
references with their named equivalent. This should make the code
clearer since the reader only needs to remember character names instead
of hexadecimal Unicode code points. The equivalency table can be
found in section 13.5 of the HTML standard:
https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references.
All used named character references were introduced in HTML 4.0 (or
earlier) which means that they should be supported in XHTML as well.
(org-html-checkbox-types): Change   to as
elsewhere. Unfortunately, ☐ and &x#2611; don't have a
corresponding named character reference in the HTML standard, but it
should be clear enough what they are supposed to represent from the
context.
* testing/lisp/test-ox-html.el (ox-html/checkbox-ascii): Adjust to
use of in `org-html-checkbox-types'. Since isn't part
of (vanilla) XML, we need to use `libxml-parse-html-region' which
changes the output a little bit: The surrounding html and body elements
are retained (?) as well as the newlines between the li elements.
---
lisp/ox-html.el | 20 ++++++++++----------
testing/lisp/test-ox-html.el | 24 ++++++++++++++++--------
2 files changed, 26 insertions(+), 18 deletions(-)
diff --git a/lisp/ox-html.el b/lisp/ox-html.el
index 1fee9f05d..f60bda02b 100644
--- a/lisp/ox-html.el
+++ b/lisp/ox-html.el
@@ -233,10 +233,10 @@ For blocks that should contain headlines, use the HTML_CONTAINER
property on the headline itself.")
(defconst org-html-special-string-regexps
- '(("\\\\-" . "­") ; shy
- ("---\\([^-]\\)" . "—\\1") ; mdash
- ("--\\([^-]\\)" . "–\\1") ; ndash
- ("\\.\\.\\." . "…")) ; hellip
+ '(("\\\\-" . "­")
+ ("---\\([^-]\\)" . "—\\1")
+ ("--\\([^-]\\)" . "–\\1")
+ ("\\.\\.\\." . "…"))
"Regular expressions for special string conversion.")
(defvar org-html--id-attr-prefix "ID-"
@@ -1130,7 +1130,7 @@ org-info.js for your website."
((on . "☑") (off . "☐") (trans . "☐")))
(ascii .
((on . "<code>[X]</code>")
- (off . "<code>[ ]</code>")
+ (off . "<code>[ ]</code>")
(trans . "<code>[-]</code>")))
(html .
((on . "<input type='checkbox' checked='checked' />")
@@ -2346,7 +2346,7 @@ INFO is a plist containing export options."
(concat (plist-get info :html-tag-class-prefix)
(org-html-fix-class-name tag))
tag))
- tags " "))))
+ tags " "))))
;;;; Src Code
@@ -2872,7 +2872,7 @@ description of TODO, PRIORITY, TEXT, TAGS, and INFO arguments."
(concat todo (and todo " ")
priority (and priority " ")
text
- (and tags "   ") tags)))
+ (and tags "  ") tags)))
(defun org-html--container (headline info)
"Return HTML container name for HEADLINE as a string.
@@ -3763,7 +3763,7 @@ channel."
" align=\"%s\"" " class=\"org-%s\"")
(org-export-table-cell-alignment table-cell info)))))
(when (or (not contents) (string= "" (org-trim contents)))
- (setq contents " "))
+ (setq contents " "))
(cond
((and (org-export-table-has-header-p table info)
(= 1 (org-export-table-row-group table-row info)))
@@ -3942,7 +3942,7 @@ information."
:post-blank 0))
(value (org-html-plain-text (org-timestamp-translate timestamp-no-blank) info)))
(format "<span class=\"timestamp-wrapper\"><span class=\"timestamp\">%s</span></span>"
- (replace-regexp-in-string "--" "–" value))))
+ (replace-regexp-in-string "--" "–" value))))
;;;; Underline
@@ -3972,7 +3972,7 @@ contextual information."
(format "<p class=\"verse\">\n%s</p>"
;; Replace leading white spaces with non-breaking spaces.
(replace-regexp-in-string
- "^[ \t]+" (lambda (m) (org-html--make-string (length m) " "))
+ "^[ \t]+" (lambda (m) (org-html--make-string (length m) " "))
;; Replace each newline character with line break. Also
;; remove any trailing "br" close-tag so as to avoid
;; duplicates.
diff --git a/testing/lisp/test-ox-html.el b/testing/lisp/test-ox-html.el
index c02d47fea..75ba177b3 100644
--- a/testing/lisp/test-ox-html.el
+++ b/testing/lisp/test-ox-html.el
@@ -822,13 +822,21 @@ $x$"
(skip-unless (libxml-available-p))
(should
(equal
- `(ul ((class . "org-ul"))
- (li ((class . "off"))
- (code nil ,(format "[%c]" (char-from-name "NO-BREAK SPACE"))) " not yet")
- (li ((class . "on"))
- (code nil "[X]") " I am done")
- (li ((class . "trans"))
- (code nil "[-]") " unclear"))
+ `(html nil
+ (body nil
+ (ul ((class . "org-ul"))
+ (li ((class . "off"))
+ (code nil ,(format "[%c]" (char-from-name "NO-BREAK SPACE"))) " not yet")
+ "
+"
+ (li ((class . "on"))
+ (code nil "[X]") " I am done")
+ "
+"
+ (li ((class . "trans"))
+ (code nil "[-]") " unclear")
+ "
+")))
(org-test-with-temp-text "
- [ ] not yet
- [X] I am done
@@ -863,7 +871,7 @@ $x$"
(org-export-to-buffer 'html export-buffer
nil nil nil t '(:html-checkbox-type html))
(with-current-buffer export-buffer
- (libxml-parse-xml-region (point-min) (point-max))))))))
+ (libxml-parse-html-region (point-min) (point-max))))))))
(ert-deftest ox-html/checkbox-unicode ()
"Test HTML checkbox rendering"
--
2.50.0