From 2fa32c724fe1c6f6ebe4225fc49cc5a3e05eab30 Mon Sep 17 00:00:00 2001
From: Shuo Shen <shuoshen178@gmail.com>
Date: Fri, 9 May 2025 11:06:35 -0700
Subject: [PATCH] lisp/org.el: refactor the org-tag family regex to prevent
 duplicates

* org.el: This is a refactorization patch, and there's no functional
changes.  The goal of patch is to remove duplicates in the tag regex
such that in the future it will be easier to allow new char sets for
tags.  The commit added `org-tag-valid-char-set`,
`org-tag-invalid-char-re`, `org-tag-group-enclosed-re`,
`org-tag-group-optional-re` to be used instead of inline regex.  It
refactored org-tag-re`, `org-tag-group-re`, and `org-tag-line-re` to
build upon the other smaller regex.

* org-bibtex.el: org-element.el: Replaced the inline regex with
predefined consts
---
 lisp/ol-bibtex.el   |  3 ++-
 lisp/org-element.el |  2 +-
 lisp/org.el         | 42 ++++++++++++++++++++++++++++++++----------
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/lisp/ol-bibtex.el b/lisp/ol-bibtex.el
index 37b1cd394..4851bfc9d 100644
--- a/lisp/ol-bibtex.el
+++ b/lisp/ol-bibtex.el
@@ -123,6 +123,7 @@
 (defvar org-property-end-re)
 (defvar org-special-properties)
 (defvar org-window-config-before-follow-link)
+(defvar org-tag-invalid-char-re)
 
 (declare-function bibtex-beginning-of-entry "bibtex" ())
 (declare-function bibtex-generate-autokey "bibtex" ())
@@ -754,7 +755,7 @@ entry at point."
 			 (funcall
 			  togtag
 			  (replace-regexp-in-string
-			   "[^[:alnum:]_@#%]" ""
+			   org-tag-invalid-char-re ""
 			   (replace-regexp-in-string "[ \t]+" "_" kw))))
 		     (org-bibtex-put (car pair) (cdr pair) insert-raw)))
 	(_ (org-bibtex-put (car pair) (cdr pair) insert-raw))))
diff --git a/lisp/org-element.el b/lisp/org-element.el
index 56c03a0aa..557a639b5 100644
--- a/lisp/org-element.el
+++ b/lisp/org-element.el
@@ -1348,7 +1348,7 @@ Throw `:org-element-deferred-retry' signal at the end."
                      (skip-chars-forward " \t"))))
 	     (title-start (point))
 	     (tags (when (re-search-forward
-			  "\\(:[[:alnum:]_@#%:]+:\\)[ \t]*$"
+                          (format "\\(:%s:\\)[ \t]*$" org-tag-re)
 			  (line-end-position)
 			  'move)
 		     (goto-char (match-beginning 0))
diff --git a/lisp/org.el b/lisp/org.el
index dfcbe84ff..2d7b58f5d 100644
--- a/lisp/org.el
+++ b/lisp/org.el
@@ -665,16 +665,41 @@ but the stars and the body are.")
 An archived subtree does not open during visibility cycling, and does
 not contribute to the agenda listings.")
 
-(defconst org-tag-re "[[:alnum:]_@#%]+"
+(defconst org-tag-valid-char-set "[:alnum:]_@#%"
+  "Regex pattern representing the set of characters and character
+classes valid within a tag.  This is the base pattern for tag
+matching regex.")
+
+(defconst org-tag-invalid-char-re
+  (format "[^%s]" org-tag-valid-char-set)
+  "Regexp matching a single character that's NOT a valid tag char.")
+
+(defconst org-tag-re (format "[%s]+" org-tag-valid-char-set)
   "Regexp matching a single tag.")
 
-(defconst org-tag-group-re "[ \t]+\\(:\\([[:alnum:]_@#%:]+\\):\\)[ \t]*$"
+(defconst org-tag-group-enclosed-re
+  (format "\\(:\\([%s:]+\\):\\)" org-tag-valid-char-set)
+  "Regex pattern for a colon-enclosed group of tags without matching
+the enclosing spaces and tabs, e.g., \":TAG1:TAG2:\". Match group
+1 stores the tags with the enclosing colons, and match group 2
+stores the tags without the enclosing colons. Built using
+org-tag-valid-char-set with the addition of the colon.")
+
+(defconst org-tag-group-optional-re
+  (concat "\\(?:[ \t]+" org-tag-group-enclosed-re "\\)?[ \t]*$")
+  "Regexp matching an optional tag group at the end of a line,
+ with optional leading and trailing spaces.  If a tag group is
+present, group 1 is the full tag group (with colons), group 2 is
+the tag content (without colons).")
+
+(defconst org-tag-group-re
+  (format "[ \t]+%s[ \t]*$" org-tag-group-enclosed-re)
   "Regexp matching the tag group at the end of a line, with leading spaces.
 Tags are stored in match group 1.  Match group 2 stores the tags
 without the enclosing colons.")
 
 (defconst org-tag-line-re
-  "^\\*+ \\(?:.*[ \t]\\)?\\(:\\([[:alnum:]_@#%:]+\\):\\)[ \t]*$"
+  (format "^\\*+ \\(?:.*[ \t]\\)?%s[ \t]*$" org-tag-group-enclosed-re)
   "Regexp matching tags in a headline.
 Tags are stored in match group 1.  Match group 2 stores the tags
 without the enclosing colons.")
@@ -4522,8 +4547,7 @@ related expressions."
 		      "\\(?: +" org-todo-regexp "\\)?"
 		      "\\(?: +\\(\\[#.\\]\\)\\)?"
 		      "\\(?: +\\(.*?\\)\\)??"
-		      "\\(?:[ \t]+\\(:[[:alnum:]_@#%:]+:\\)\\)?"
-		      "[ \t]*$")
+                      org-tag-group-optional-re)
 	      org-complex-heading-regexp-format
 	      (concat "^\\(\\*+\\)"
 		      "\\(?: +" org-todo-regexp "\\)?"
@@ -4536,14 +4560,12 @@ related expressions."
 		      "\\(%s\\)"
 		      "\\(?: *\\[[0-9%%/]+\\]\\)*"
 		      "\\)"
-		      "\\(?:[ \t]+\\(:[[:alnum:]_@#%%:]+:\\)\\)?"
-		      "[ \t]*$")
+		      org-tag-group-optional-re)
 	      org-todo-line-tags-regexp
 	      (concat "^\\(\\*+\\)"
 		      "\\(?: +" org-todo-regexp "\\)?"
 		      "\\(?: +\\(.*?\\)\\)??"
-		      "\\(?:[ \t]+\\(:[[:alnum:]:_@#%]+:\\)\\)?"
-		      "[ \t]*$"))
+                      org-tag-group-optional-re))
 	(org-compute-latex-and-related-regexp)))))
 
 (defun org-collect-keywords (keywords &optional unique directory)
@@ -12047,7 +12069,7 @@ in Lisp code use `org-set-tags' instead."
 	       (tags
 		(replace-regexp-in-string
 		 ;; Ignore all forbidden characters in tags.
-		 "[^[:alnum:]_@#%]+" ":"
+                 org-tag-invalid-char-re ":"
 		 (if (or (eq t org-use-fast-tag-selection)
 			 (and org-use-fast-tag-selection
 			      (delq nil (mapcar #'cdr table))))
-- 
2.39.5 (Apple Git-154)

