branch: externals/ellama
commit d3ede72b263379dcd80cdc99b1de6c4ebf1d2a69
Merge: 56bb486665 ac3c2f7b71
Author: Sergey Kostyaev <[email protected]>
Commit: GitHub <[email protected]>

    Merge pull request #386 from s-kostyaev/codex/fix-regexp-stack-overflow
    
    Fix regexp stack overflow in markdown code fence conversion
---
 NEWS.org             |  6 ++++++
 ellama.el            | 38 ++++++++++++++++++++++++++++++++++----
 tests/test-ellama.el |  9 +++++++++
 3 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/NEWS.org b/NEWS.org
index ba38d92e30..6b80c149a0 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,3 +1,9 @@
+* Version 1.12.10
+- Fix stack overflow in markdown code fence conversion in
+  ~ellama--translate-markdown-to-org-filter~ by replacing expensive regular
+  expressions with linear line parsing for inline fences.
+- Add regression test ~test-ellama-md-to-org-inline-fence-long-line~ to cover
+  long lines with inline code fences and prevent this failure from returning.
 * Version 1.12.9
 - Fix result delivery for already approved async tools. Ensure results are
   delivered via callback function for both interactive and pre‑approved tools.
diff --git a/ellama.el b/ellama.el
index 750ab48ecc..78d2f2bd34 100644
--- a/ellama.el
+++ b/ellama.el
@@ -6,7 +6,7 @@
 ;; URL: http://github.com/s-kostyaev/ellama
 ;; Keywords: help local tools
 ;; Package-Requires: ((emacs "28.1") (llm "0.24.0") (plz "0.8") (transient 
"0.7") (compat "29.1") (yaml "1.2.3"))
-;; Version: 1.12.9
+;; Version: 1.12.10
 ;; SPDX-License-Identifier: GPL-3.0-or-later
 ;; Created: 8th Oct 2023
 
@@ -530,9 +530,40 @@ It should be a function with single argument generated 
text string."
 (defun ellama--replace-first-begin-src (text)
   "Replace first begin src in TEXT."
   (if (not (string-match-p (rx (literal "#+BEGIN_SRC")) text))
-      (replace-regexp-in-string "^[[:space:]]*```\\(\\(.\\|\n\\)*\\)" 
"#+BEGIN_SRC\\1" text)
+      (with-temp-buffer
+        (insert text)
+        (goto-char (point-min))
+        (when (re-search-forward "^[[:space:]]*```" nil t)
+          (replace-match "#+BEGIN_SRC" t t))
+        (buffer-substring-no-properties (point-min) (point-max)))
     text))
 
+(defun ellama--replace-inline-code-fences (text)
+  "Replace inline markdown code fences in TEXT with org equivalents."
+  (with-temp-buffer
+    (insert text)
+    (goto-char (point-min))
+    (while (not (eobp))
+      (let* ((line-beg (line-beginning-position))
+             (line-end (line-end-position))
+             (line (buffer-substring-no-properties line-beg line-end))
+             (fence-pos (string-match "```" line)))
+        ;; Handle cases like `text ```lang' and `text ```text'.
+        (when (and fence-pos (> fence-pos 0))
+          (let ((prefix (substring line 0 fence-pos))
+                (suffix (substring line (+ fence-pos 3))))
+            (cond
+             ((string-match-p "\\`[A-Za-z0-9-]+\\'" suffix)
+              (goto-char line-beg)
+              (delete-region line-beg line-end)
+              (insert prefix "\n#+BEGIN_SRC " suffix))
+             ((not (string= suffix ""))
+              (goto-char line-beg)
+              (delete-region line-beg line-end)
+              (insert prefix "\n#+END_SRC\n" suffix))))))
+      (forward-line 1))
+    (buffer-substring-no-properties (point-min) (point-max))))
+
 (defun ellama--replace-bad-code-blocks (text)
   "Replace code src blocks in TEXT."
   (with-temp-buffer
@@ -655,8 +686,7 @@ This filter contains only subset of markdown syntax to be 
good enough."
     text
     ;; code blocks
     (replace-regexp-in-string "^[[:space:]]*```\\(.+\\)$" "#+BEGIN_SRC \\1")
-    (replace-regexp-in-string "^\\(.+\\)```\\([A-Za-z0-9\\-]+\\)$" 
"\\1\n#+BEGIN_SRC \\2")
-    (replace-regexp-in-string "^\\(.+\\)```\\(.+\\)$" "\\1\n#+END_SRC\n\\2")
+    (ellama--replace-inline-code-fences)
     (ellama--replace-first-begin-src)
     (replace-regexp-in-string "^<!-- language: \\(.+\\) -->\n```" "#+BEGIN_SRC 
\\1")
     (replace-regexp-in-string "^[[:space:]]*```$" "#+END_SRC")
diff --git a/tests/test-ellama.el b/tests/test-ellama.el
index e267a9fa5e..b81c5f5ba4 100644
--- a/tests/test-ellama.el
+++ b/tests/test-ellama.el
@@ -833,6 +833,15 @@ That's it."))))
 (message \"ok\")
 #+END_SRC"))))
 
+(ert-deftest test-ellama-md-to-org-inline-fence-long-line ()
+  (let* ((long-part (make-string 150000 ?a))
+         (text (concat "<think>\n" long-part "```text\nbody\n```\n</think>"))
+         (result (ellama--translate-markdown-to-org-filter text)))
+    (should (string-match-p "#\\+BEGIN_QUOTE" result))
+    (should (string-match-p "#\\+BEGIN_SRC text" result))
+    (should (string-match-p "#\\+END_SRC" result))
+    (should (string-match-p "#\\+END_QUOTE" result))))
+
 (ert-deftest test-ellama-md-to-org-code-inline-latex ()
   (let ((result (ellama--translate-markdown-to-org-filter "_some italic_
 $$P_\\theta(Y_T, ..., Y_2|Y_1, x_1, ..., x_T)$$

Reply via email to