branch: externals/matlab-mode
commit b60330dfa4862cae04fe16dfb3d44f93219c57b8
Author: John Ciolfi <[email protected]>
Commit: John Ciolfi <[email protected]>
matlab-ts-mode: doc handling corrupted content
---
contributing/treesit-mode-how-to.org | 86 ++++++++++++++++++++++++++++++++++++
tests/t-utils.el | 32 ++++++++------
2 files changed, 104 insertions(+), 14 deletions(-)
diff --git a/contributing/treesit-mode-how-to.org
b/contributing/treesit-mode-how-to.org
index 985cc4e6fa..6d2f42e4ca 100644
--- a/contributing/treesit-mode-how-to.org
+++ b/contributing/treesit-mode-how-to.org
@@ -1460,6 +1460,92 @@ there-end with mismatch true (t) because the string is
missing the starting quot
No buffer modifications
#+end_src
+* Setup: Handling Corrupted Content
+
+Corrupted content in LANGUAGE, NAME.LANG, files can crash Emacs when your
tree-sitter
+language shared library runs on the corrupted content. For example, try load
a large
+binary file and =M-x LANGUAGE-major-mode=. Since content should be utf-8, you
should add
+to the start of your LANGUAGE-major-mode:
+
+#+begin_src emacs-lisp
+ (defun LANGUAGE-ts-mode--check-file-encoding ()
+ "Check file encoding.
+ Error is signaled if contents are corrupt because non-utf8 printable
+ content can crash Emacs via the LANGUAGE tree-sitter parser."
+
+ (let ((bad-char-point (save-excursion
+ (goto-char (point-min))
+ (when (re-search-forward "[^[:print:][:space:]]"
nil t)
+ (point)))))
+ (when bad-char-point
+ (fundamental-mode)
+ (goto-char bad-char-point)
+ (user-error "Buffer appears corrupt, non-printable utf8 character at
point %d: %c"
+ bad-char-point (char-before)))))
+
+
+ (define-derived-mode matlab-ts-mode prog-mode "LANGUAGE:ts"
+ "Documentation."
+
+ (LANGUAGE-ts-mode--check-file-encoding)
+
+ (when (treesit-ready-p 'LANGUAGE)
+ ;; <snip>
+ ))
+#+end_src
+
+** Test: Handling Corrupted Content
+
+Test setup:
+
+ #+begin_example
+ ./LANGUAGE-ts-mode.el
+ ./tests/test-LANUGAGE-ts-mode-file-encoding.el
+ ./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME1.LANG
+ ./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME1_expected.txt
+ ./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME2.LANG
+ ./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME2_expected.txt
+ ....
+ #+end_example
+
+=./tests/test-LANUGAGE-ts-mode-file-encoding.el= contains:
+
+ #+begin_src emacs-lisp
+ (require 't-utils)
+ (require 'LANGUAGE-ts-mode)
+
+ (defvar test-LANGUAGE-ts-mode-file-encoding--file nil)
+
+ (defun test-LANGUAGE-ts-mode-file-encoding--file (lang-file)
+ "Test file-encoding on LANG-FILE."
+ (let ((test-LANGUAGE-ts-mode-file-encoding--file lang-file))
+ (ert-run-tests-interactively "test-LANGUAGE-ts-mode-file-encoding")))
+
+ (ert-deftest test-LANGUAGE-ts-mode-file-encoding ()
+ (let* ((test-name "test-LANGUAGE-ts-mode-file-encoding")
+ (lang-files (t-utils-get-files
+ test-name
+ (rx ".lang" eos)
+ nil
+ test-LANGUAGE-ts-mode-file-encoding--file)))
+ (t-utils-error-if-no-treesit-for \\='LANGUAGE test-name)
+ (t-utils-test-file-encoding test-name lang-files
\\='#LANGUAGE-ts-mode)))
+ #+end_src
+
+Create /tests/test-LANUGAGE-ts-mode-file-encoding-files/*.LANG files
containing corrupted
+(non-utf-8) content. Also create at least one valid *.LANG files.
+
+Run the test:
+
+ : M-x ert RET test-LANUGAGE-ts-mode-file-encoding RET
+
+In the =ert= result buffer, you can type \"m\" at the point of the test (where
+the color marker is) to see messages that were displayed by your test.
+
+If the =./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME*_expected.txt~=
files look good
+rename them to
=./tests/test-LANUGAGE-ts-mode-file-encoding-files/NAME*_expected.txt= (per the
+messages shown by ert).
+
* Final version
TODO
diff --git a/tests/t-utils.el b/tests/t-utils.el
index 1075f1bb30..cdcf0b10b2 100644
--- a/tests/t-utils.el
+++ b/tests/t-utils.el
@@ -47,13 +47,13 @@
;; after examining it, rename it to
;; ./tests/test-LANGUAGE-ts-mode-font-lock-files/font_lock_test1_expected.txt.
;;
-;; When you run ert interactively, you'll be presented with a *ert* buffer.
You can
-;; type "m" on the colored dots in the *ert* buffer to see the messages for
that ert test
+;; When you run ert interactively, you'll be presented with an ert result
buffer. You can
+;; type "m" on the colored dots in the ert result buffer to see the messages
for that ert test
;; and the messages contain the sub-tests from the test loop for that ert
test. This will bring
;; up an *ERT Messages* buffer. In this buffer, type
;; M-x compilation-minor-mode
-;; to view the and navigate errors. The default error viewing in the *ert*
buffer is a bit dense
-;; due to the looping nature of the t-utils tests.
+;; to view the and navigate errors. The default error viewing in the ert
result buffer is a bit
+;; dense due to the looping nature of the t-utils tests.
;;
;; To run your tests in a build system, use
;;
@@ -785,8 +785,9 @@ To loop over all NAME*.LANG font-lock test files,
interactively
\\[ert] RET test-LANGUAGE-ts-mode-font-lock RET
-In the *ert* buffer, you can type \"m\" at the point of the test (where
-the color marker is) to see messages that were displayed by your test.
+In the `ert' result buffer, you can type \"m\" at the point of the
+test (where the color marker is) to see messages that were displayed by
+your test.
To debug a specific font-lock test file
@@ -989,8 +990,9 @@ To loop over all NAME*.LANG indent test files, interactively
\\[ert] RET test-LANGUAGE-ts-mode-indent RET
-In the *ert* buffer, you can type \"m\" at the point of the test (where
-the color marker is) to see messages that were displayed by your test.
+In the `ert' result buffer, you can type \"m\" at the point of the
+test (where the color marker is) to see messages that were displayed by
+your test.
To debug a specific indent test file
@@ -1553,14 +1555,15 @@ Where ./tests/test-LANUGAGE-ts-mode-file-encoding.el
contains:
nil
test-LANGUAGE-ts-mode-file-encoding--file)))
(t-utils-error-if-no-treesit-for \\='LANGUAGE test-name)
- (t-utils-test-file-encoding test-name lang-files)))
+ (t-utils-test-file-encoding test-name lang-files \\='#LANGUAGE-ts-mode)))
To loop over all NAME*.LANG file-encoding test files, interactively
\\[ert] RET test-LANGUAGE-ts-mode-file-encoding RET
-In the *ert* buffer, you can type \"m\" at the point of the test (where
-the color marker is) to see messages that were displayed by your test.
+In the `ert' result buffer, you can type \"m\" at the point of the
+test (where the color marker is) to see messages that were displayed by
+your test.
To debug a specific file-encoding test file
@@ -1583,13 +1586,14 @@ To debug a specific file-encoding test file
(got "Major mode activated succesfully.")
(got-file (concat expected-file "~")))
- (t-utils--insert-file-for-test lang-file file-major-mode)
-
+ ;; Load lang-file in temp buffer and activate file-major-mode
(condition-case err
- (t-utils--insert-file-for-test lang-file)
+ (t-utils--insert-file-for-test lang-file file-major-mode)
(error
(setq got (concat "Major mode errored with message\n"
(error-message-string err)))))
+ (setq got (concat got "\n\n" "Entered major-mode: " (symbol-name
major-mode) "\n"))
+
(kill-buffer)
(let ((error-msg (t-utils--baseline-check