branch: externals/ellama
commit d3ede72b263379dcd80cdc99b1de6c4ebf1d2a69
Merge: 56bb486665 ac3c2f7b71
Author: Sergey Kostyaev <[email protected]>
Commit: GitHub <[email protected]>
Merge pull request #386 from s-kostyaev/codex/fix-regexp-stack-overflow
Fix regexp stack overflow in markdown code fence conversion
---
NEWS.org | 6 ++++++
ellama.el | 38 ++++++++++++++++++++++++++++++++++----
tests/test-ellama.el | 9 +++++++++
3 files changed, 49 insertions(+), 4 deletions(-)
diff --git a/NEWS.org b/NEWS.org
index ba38d92e30..6b80c149a0 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,3 +1,9 @@
+* Version 1.12.10
+- Fix stack overflow in markdown code fence conversion in
+ ~ellama--translate-markdown-to-org-filter~ by replacing expensive regular
+ expressions with linear line parsing for inline fences.
+- Add regression test ~test-ellama-md-to-org-inline-fence-long-line~ to cover
+ long lines with inline code fences and prevent this failure from returning.
* Version 1.12.9
- Fix result delivery for already approved async tools. Ensure results are
delivered via callback function for both interactive and pre‑approved tools.
diff --git a/ellama.el b/ellama.el
index 750ab48ecc..78d2f2bd34 100644
--- a/ellama.el
+++ b/ellama.el
@@ -6,7 +6,7 @@
;; URL: http://github.com/s-kostyaev/ellama
;; Keywords: help local tools
;; Package-Requires: ((emacs "28.1") (llm "0.24.0") (plz "0.8") (transient
"0.7") (compat "29.1") (yaml "1.2.3"))
-;; Version: 1.12.9
+;; Version: 1.12.10
;; SPDX-License-Identifier: GPL-3.0-or-later
;; Created: 8th Oct 2023
@@ -530,9 +530,40 @@ It should be a function with single argument generated
text string."
(defun ellama--replace-first-begin-src (text)
"Replace first begin src in TEXT."
(if (not (string-match-p (rx (literal "#+BEGIN_SRC")) text))
- (replace-regexp-in-string "^[[:space:]]*```\\(\\(.\\|\n\\)*\\)"
"#+BEGIN_SRC\\1" text)
+ (with-temp-buffer
+ (insert text)
+ (goto-char (point-min))
+ (when (re-search-forward "^[[:space:]]*```" nil t)
+ (replace-match "#+BEGIN_SRC" t t))
+ (buffer-substring-no-properties (point-min) (point-max)))
text))
+(defun ellama--replace-inline-code-fences (text)
+ "Replace inline markdown code fences in TEXT with org equivalents."
+ (with-temp-buffer
+ (insert text)
+ (goto-char (point-min))
+ (while (not (eobp))
+ (let* ((line-beg (line-beginning-position))
+ (line-end (line-end-position))
+ (line (buffer-substring-no-properties line-beg line-end))
+ (fence-pos (string-match "```" line)))
+ ;; Handle cases like `text ```lang' and `text ```text'.
+ (when (and fence-pos (> fence-pos 0))
+ (let ((prefix (substring line 0 fence-pos))
+ (suffix (substring line (+ fence-pos 3))))
+ (cond
+ ((string-match-p "\\`[A-Za-z0-9-]+\\'" suffix)
+ (goto-char line-beg)
+ (delete-region line-beg line-end)
+ (insert prefix "\n#+BEGIN_SRC " suffix))
+ ((not (string= suffix ""))
+ (goto-char line-beg)
+ (delete-region line-beg line-end)
+ (insert prefix "\n#+END_SRC\n" suffix))))))
+ (forward-line 1))
+ (buffer-substring-no-properties (point-min) (point-max))))
+
(defun ellama--replace-bad-code-blocks (text)
"Replace code src blocks in TEXT."
(with-temp-buffer
@@ -655,8 +686,7 @@ This filter contains only subset of markdown syntax to be
good enough."
text
;; code blocks
(replace-regexp-in-string "^[[:space:]]*```\\(.+\\)$" "#+BEGIN_SRC \\1")
- (replace-regexp-in-string "^\\(.+\\)```\\([A-Za-z0-9\\-]+\\)$"
"\\1\n#+BEGIN_SRC \\2")
- (replace-regexp-in-string "^\\(.+\\)```\\(.+\\)$" "\\1\n#+END_SRC\n\\2")
+ (ellama--replace-inline-code-fences)
(ellama--replace-first-begin-src)
(replace-regexp-in-string "^<!-- language: \\(.+\\) -->\n```" "#+BEGIN_SRC
\\1")
(replace-regexp-in-string "^[[:space:]]*```$" "#+END_SRC")
diff --git a/tests/test-ellama.el b/tests/test-ellama.el
index e267a9fa5e..b81c5f5ba4 100644
--- a/tests/test-ellama.el
+++ b/tests/test-ellama.el
@@ -833,6 +833,15 @@ That's it."))))
(message \"ok\")
#+END_SRC"))))
+(ert-deftest test-ellama-md-to-org-inline-fence-long-line ()
+ (let* ((long-part (make-string 150000 ?a))
+ (text (concat "<think>\n" long-part "```text\nbody\n```\n</think>"))
+ (result (ellama--translate-markdown-to-org-filter text)))
+ (should (string-match-p "#\\+BEGIN_QUOTE" result))
+ (should (string-match-p "#\\+BEGIN_SRC text" result))
+ (should (string-match-p "#\\+END_SRC" result))
+ (should (string-match-p "#\\+END_QUOTE" result))))
+
(ert-deftest test-ellama-md-to-org-code-inline-latex ()
(let ((result (ellama--translate-markdown-to-org-filter "_some italic_
$$P_\\theta(Y_T, ..., Y_2|Y_1, x_1, ..., x_T)$$