branch: externals/matlab-mode
commit 1b9fcd2b9de7267d9e792651b2d9a15d81fa7e41
Author: John Ciolfi <[email protected]>
Commit: John Ciolfi <[email protected]>
test: add tests/sweep-test-matlab-ts-grammar.el
---
tests/sweep-test-matlab-ts-grammar.el | 160 ++++++++++++++++++
tests/sweep-test-matlab-ts-grammar.sh | 23 +++
tests/sweep-test-matlab-ts-mode-indent.el | 7 +-
tests/t-utils.el | 260 +++++++++++++++++++++++++++---
4 files changed, 424 insertions(+), 26 deletions(-)
diff --git a/tests/sweep-test-matlab-ts-grammar.el
b/tests/sweep-test-matlab-ts-grammar.el
new file mode 100644
index 0000000000..3b19d28b13
--- /dev/null
+++ b/tests/sweep-test-matlab-ts-grammar.el
@@ -0,0 +1,160 @@
+;;; sweep-test-matlab-ts-grammar.el --- -*- lexical-binding: t -*-
+;;
+;; Copyright 2025 Free Software Foundation, Inc.
+;;
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs; see the file COPYING. If not, write to
+;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+;;
+
+;;; Commentary:
+;;
+;; M-: (sweep-test-matlab-ts-grammar) - Look for bad matlab tree-sitter parses
+;; on *.m files in current directory
+;;
+
+
+;;; Code:
+
+(require 't-utils)
+(require 'matlab-ts-mode)
+(require 'matlab--access)
+
+
+(defun sweep-test-matlab-ts-grammar--syntax-checker (m-files)
+ "Syntax check each *.m file in M-FILES using MATLAB checkIssue.
+
+Returns hash table where the keys are the m-files and each key
+value is either \"no-syntax-errors\" or \"has-syntax-errors\"."
+ (let* ((matlab-exe (or (matlab--get-abs-matlab-exe)
+ (error "No matlab found (to fix put matlab on your
PATH)")))
+ (tmp-check-file (make-temp-file "sweep_test_matlab_ts_grammar" nil
".m"))
+ (check-fun (file-name-sans-extension (file-name-nondirectory
tmp-check-file)))
+ (tmp-check-file-dir (file-name-directory tmp-check-file))
+ (result-ht (make-hash-table :test 'equal)))
+
+ (with-temp-buffer
+ (cd tmp-check-file-dir)
+ (insert "filesToCheck = ...
+ [
+")
+
+ (dolist (m-file m-files)
+ (insert " \"" m-file "\"\n"))
+
+ (insert " ];
+
+for fIdx = 1:length(filesToCheck)
+ file = filesToCheck(fIdx);
+ issues = codeIssues(file);
+
+ % Syntax errors have error Sererity
+ syntaxErrors = issues.Issues.Severity(:) ==
matlab.codeanalysis.IssueSeverity.error;
+ if any(syntaxErrors)
+ sIdx = find(syntaxErrors, 1, 'first');
+ syntaxStatus = sprintf(\"has-syntax-errors at line %d:%d to %d:%d -
%s\", ...
+ issues.Issues.LineStart(sIdx), ...
+ issues.Issues.ColumnStart(sIdx), ...
+ issues.Issues.LineEnd(sIdx), ...
+ issues.Issues.ColumnEnd(sIdx), ...
+ issues.Issues.Description(sIdx));
+ else
+ syntaxStatus = \"no-syntax-errors\";
+ end
+ disp(strcat(\"--> \", file, \" > \", syntaxStatus));
+end
+");
+ (let ((coding-system-for-write 'raw-text-unix))
+ (write-region (point-min) (point-max) tmp-check-file)))
+
+ ;; Run codeIssues(mFile) via: matlab --batch check-fun
+ (with-temp-buffer
+ (cd tmp-check-file-dir)
+ (let ((status (call-process matlab-exe nil t nil "-batch" check-fun)))
+ (when (not (= status 0))
+ (error "%s -batch %s (in directory %s) returned non-zero status, %d,
with output:\n%s"
+ matlab-exe check-fun tmp-check-file-dir status
(buffer-string))))
+ (goto-char (point-min))
+
+ (while (not (eobp))
+ (when (looking-at "^--> \\([^>]+\\) > \\(.+\\)$")
+ (let* ((info-line (match-string 0))
+ (m-file (match-string 1))
+ (syntax-status (match-string 2))
+ (syntax-status-pair
+ (cond
+ ((string= syntax-status "no-syntax-errors")
+ (cons syntax-status nil))
+ ((string-match "\\`\\(has-syntax-errors\\) \\(at line
[0-9]+:[0-9]+.+\\)\\'"
+ syntax-status)
+ (cons (match-string 1 syntax-status) (match-string 2
syntax-status)))
+ (t
+ (error "Unexpected result: %s" info-line)))))
+ (puthash m-file syntax-status-pair result-ht)))
+ (forward-line))
+
+ ;; Validate we got expected stdout:
+ ;; --> M-FILE1 > SYNTAX-STATUS1
+ ;; --> M-FILE2 > SYNTAX-STATUS2
+ ;; ....
+ (dolist (m-file m-files)
+ (when (not (gethash m-file result-ht))
+ (error "%s -batch %s (in directory %s) didn't return expected
stdout, got:\n%s"
+ matlab-exe check-fun tmp-check-file-dir (buffer-string)))))
+
+ (delete-file tmp-check-file)
+ result-ht))
+
+(defun sweep-test-matlab-ts-grammar (&optional directory log-file)
+ "Check matlab tree-sitter parse of all *.m files under DIRECTORY.
+DIRECTORY defaults to the current directory.
+
+ \\[sweep-test-matlab-ts-grammar]
+
+This validates that if MATLAB tree-sitter parse has ERROR nodes that the
+MATLAB codeIssues command,
+https://www.mathworks.com/help/matlab/ref/codeissues.html says the file
+has syntax issues (issue servity of error). Likewise if MATLAB
+tree-sitter parse says no syntax errors this test confirms that the
+MATLAB codeIssues command reports the same.
+
+Messages are logged to LOG-FILE, which defaults to
+sweep-test-matlab-ts-grammar.log
+
+When run interactively, displays the result in a *sweep-test-matlab-ts-grammar*
+buffer, otherwise the results are displayed on stdout.
+
+On large directory trees, run via
+ cd /path/to/your/directory
+ Emacs --batch \\
+ -q \\
+ -L /path/to/Emacs-MATLAB-Mode \\
+ -l /path/to/Emacs-MATLAB-Mode/matlab-autoload.el \\
+ -L /path/to/Emacs-MATLAB-Mode/tests/ \\
+ -l /path/to/Emacs-MATLAB-Mode/tests/t-utils.el \\
+ -l /path/to/Emacs-MATLAB-Mode/tests/sweep-test-matlab-ts-grammar \\
+ -f sweep-test-matlab-ts-grammar
+to see the progress messages in your terminal."
+ (interactive)
+ (let ((test-name "sweep-test-matlab-ts-grammar"))
+ (t-utils-error-if-no-treesit-for 'matlab test-name)
+ (t-utils-sweep-test-ts-grammar test-name
+ (or directory default-directory)
+ (rx ".m" eos)
+ #'matlab-ts-mode
+
#'sweep-test-matlab-ts-grammar--syntax-checker
+ nil
+ log-file)))
+
+(provide 'sweep-test-matlab-ts-grammar)
+;;; sweep-test-matlab-ts-grammar.el ends here
diff --git a/tests/sweep-test-matlab-ts-grammar.sh
b/tests/sweep-test-matlab-ts-grammar.sh
new file mode 100755
index 0000000000..b84ee00bb2
--- /dev/null
+++ b/tests/sweep-test-matlab-ts-grammar.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/bash
+# File: Emacs-MATLAB-Mode/tests/sweep-test-matlab-ts-grammar.sh
+# Abstract:
+# cd /your/work/directory
+#
+# git clone https://github.com/mathworks/Emacs-MATLAB-Mode.git
+# cd Emacs-MATLAB-Mode
+# make lisp
+#
+# cd /path/to/directory/containing/mFiles
+# /path/to/Emacs-MATLAB-Mode/tests/sweep-test-matlab-ts-grammar.sh
+#
+
+EmacsMATLABModeDir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null
&& cd .. && pwd)
+
+emacs --batch \
+ -q \
+ -L "$EmacsMATLABModeDir" \
+ -l "$EmacsMATLABModeDir/matlab-autoload.el" \
+ -L "$EmacsMATLABModeDir/tests" \
+ -l "$EmacsMATLABModeDir/tests/t-utils.el" \
+ -l "$EmacsMATLABModeDir/tests/sweep-test-matlab-ts-grammar.el" \
+ -f sweep-test-matlab-ts-grammar
diff --git a/tests/sweep-test-matlab-ts-mode-indent.el
b/tests/sweep-test-matlab-ts-mode-indent.el
index f316f37a16..c556363653 100644
--- a/tests/sweep-test-matlab-ts-mode-indent.el
+++ b/tests/sweep-test-matlab-ts-mode-indent.el
@@ -31,8 +31,9 @@
(require 'matlab-ts-mode)
(require 'matlab--access)
-(defvar sweep-test-matlab-ts-mode-indent--mlint (or (matlab--get-mlint-exe)
- (error "MLint not found")))
+(defvar sweep-test-matlab-ts-mode-indent--mlint
+ (or (matlab--get-mlint-exe)
+ (error "MLint not found, is matlab on your PATH?")))
(defun sweep-test-matlab-ts-mode-indent--syntax-checker (file)
"MLint FILE, return pair (VALID . CHECK-RESULT).
@@ -76,7 +77,7 @@ reported which is likely a bug in the tree-sitter parser.
This calls `t-utils-sweep-test-indent' with does a number of
checks to validate the ident rules. When run interactively,
-displays the result in a *t-utils-seep-indent* buffer, otherwise
+displays the result in a *sweep-test-matlab-ts-mode-indent* buffer, otherwise
the results are displayed on stdout."
(let ((test-name "sweep-test-matlab-ts-mode-indent")
diff --git a/tests/t-utils.el b/tests/t-utils.el
index cdcf0b10b2..f110627533 100644
--- a/tests/t-utils.el
+++ b/tests/t-utils.el
@@ -320,20 +320,63 @@ baseline check fails."
test-name lang-file (t-utils--took start-time))
error-msg))
-(defun t-utils--insert-file-for-test (file &optional file-major-mode)
+(defun t-utils--display-result (test-name directory result &optional no-erase)
+ "Display a test RESULT string.
+If noninteractive this shows the result using `message', otherwise this
+creates *TEST-NAME* result buffer containing RESULT in DIRECTORY and
+dislays that buffer. Optional NO-ERASE, if non-nil will not erase the
+result buffer prior to inserting RESULT."
+ (if noninteractive
+ (message "%s" result)
+ (let ((result-buf (get-buffer-create (concat "*" test-name "*"))))
+ (with-current-buffer result-buf
+ (read-only-mode -1)
+ (buffer-disable-undo)
+ (setq-local default-directory (file-truename directory))
+ (when (not no-erase)
+ (erase-buffer))
+ (if (= (point-min) (point-max))
+ (insert "# -*- compilation-minor-mode -*-\n\n")
+ (goto-char (point-max)))
+ (insert result)
+ (goto-char (point-min))
+ (text-mode) ;; so we can enable compilation-minor-mode
+ (compilation-minor-mode) ;; this lets us navigate to errors (would be
nice to disable "g")
+ (set-buffer-modified-p nil)
+ (read-only-mode 1))
+ (display-buffer result-buf))))
+
+(defun t-utils--insert-file-for-test (file &optional file-major-mode
skip-corrupt-check)
"Insert FILE into current temporary buffer for testing.
If optional FILE-MAJOR-MODE function is provided, run that, otherwise
we examine the first line of the file for the major mode:
+
-*- MODE-NAME -*-
-or
- -*- mode: MODE-NAME -*-"
+ -*- mode: MODE-NAME -*-
+
+and run that.
+
+If optional SKIP-CORRUPT-CHECK is non-nil, the check for corrupted content is
+skipped."
(insert-file-contents-literally file)
+
+ ;; We're testing a programming lanugage which is using utf-8-unix encoding
+ (set-buffer-file-coding-system 'utf-8-unix)
+
+ ;; Check for corrupted characters (these can crash Emacs via the language
server parser)
+ (when (not skip-corrupt-check)
+ (goto-char (point-min))
+ (when (re-search-forward "[^[:print:][:space:]]" nil t)
+ (error "%s appears corrupt, non-printable utf8 character at point %d: %c"
+ file (point) (char-before))))
+
;; CRLF -> LF for consistency between Unix and Windows
(goto-char (point-min))
(while (re-search-forward "\r" nil t)
(replace-match ""))
(goto-char (point-min))
+
;; Set mode
(if file-major-mode
(funcall file-major-mode)
@@ -344,8 +387,10 @@ or
(let* ((mode (match-string 1))
(mode-cmd (intern (concat mode "-mode"))))
(funcall mode-cmd)))
+
;; Incase the mode moves the point, reset to point-min.
(goto-char (point-min))
+
;; Stash away the real buffer file for later use (and return it).
(setq-local t-utils--buf-file file))
@@ -1178,7 +1223,7 @@ The result is:
When run in an interacive Emacs session, e.g.
M-: (sweep-LANGUAGE-ts-mode-indent)
-the result is shown in \"*t-utils-sweep-indent*\" buffer, otherwise it
+the result is shown in \"*TEST-NAME*\" buffer, otherwise it
is displayed on stdout.
After running this, you examine the results to see if there are issues.
@@ -1242,9 +1287,7 @@ LANGUAGE tree-sitter that need addressing or some other
issue."
(format "%s:1: note: indent took
%.3f seconds\n"
file (gethash file
took-ht)))
files))))
- (result (concat "# -*- compilation-minor-mode -*-\n"
- "\n"
- (format "Files-with-parse-error-nodes%s:\n"
+ (result (concat (format "Files-with-parse-error-nodes%s:\n"
(if syntax-checker-fun
"-but-pass-syntax-checker-fun"
""))
@@ -1261,21 +1304,7 @@ LANGUAGE tree-sitter that need addressing or some other
issue."
"Slowest-indents:\n"
slow-files)))
- (if noninteractive
- (message "%s" result)
- (let ((dir default-directory)
- (result-buf (get-buffer-create "*t-utils-sweep-indent*")))
- (with-current-buffer result-buf
- (setq-local default-directory dir)
- (read-only-mode -1)
- (erase-buffer)
- (buffer-disable-undo)
- (insert result)
- (goto-char (point-min))
- (text-mode) ;; so we can enable compilation-minor-mode
- (compilation-minor-mode)
- (read-only-mode 1))
- (display-buffer result-buf))))
+ (t-utils--display-result test-name directory result))
(message "FINISHED: %s %s" test-name (t-utils--took start-time))))
@@ -1588,7 +1617,7 @@ To debug a specific file-encoding test file
;; Load lang-file in temp buffer and activate file-major-mode
(condition-case err
- (t-utils--insert-file-for-test lang-file file-major-mode)
+ (t-utils--insert-file-for-test lang-file file-major-mode
'skip-corrupt-check)
(error
(setq got (concat "Major mode errored with message\n"
(error-message-string err)))))
@@ -1606,5 +1635,190 @@ To debug a specific file-encoding test file
(setq error-msgs (reverse error-msgs))
(should (equal error-msgs '()))))
+(defun t-utils--log (log-file string &optional create)
+ "Append STRING to LOG-FILE.
+If CREATE is t, create LOG-FILE instead of appending"
+ (let ((coding-system-for-write 'no-conversion))
+ (write-region string nil log-file (not create))))
+
+(defun t-utils--log-create (test-name log-file)
+ "Create LOG-FILE with \"START: TEST-NAME\" content.
+Returns LOG-FILE truename"
+
+ (setq log-file (file-truename (or log-file (concat test-name ".log"))))
+ (t-utils--log log-file (format "START: %s\n" test-name) t)
+ (message "Logging to: %s" log-file)
+ log-file)
+
+(defun t-utils--bad-parse-msg (lang-file parse-issue error-info)
+ "Return an bad parse error message for LANG-FILE containing ERROR-INFO.
+PARSE-ISSUE is a string for the message.
+ERROR-INFO is \"at line NUM:COL<optional-text\""
+
+ (cond
+ ((string-match "at line \\([0-9]+\\):\\([0-9]+\\)" error-info)
+ (format "%s:%s:%s: error: %s %s\n"
+ lang-file (match-string 1 error-info) (match-string 2 error-info)
+ parse-issue error-info))
+ (t
+ (error "%s bad error-info, %s" lang-file error-info))))
+
+(defun t-utils--err-loc (error-node)
+ "Get \"type at line N1:C1 to N2:C2\" string for ERROR-NODE."
+
+ (let* ((start-point (treesit-node-start error-node))
+ (start-line (line-number-at-pos start-point))
+ (start-col (save-excursion ;; error messages are one based columns
+ (goto-char start-point)
+ (1+ (current-column))))
+ (end-point (treesit-node-end error-node))
+ (end-line (line-number-at-pos end-point))
+ (end-col (save-excursion
+ (goto-char end-point)
+ (1+ (current-column)))))
+ (format "%s node at line %d:%d to %d:%d (point %d to %d)"
+ (treesit-node-type error-node)
+ start-line start-col
+ end-line end-col
+ start-point
+ end-point)))
+
+(defun t-utils-sweep-test-ts-grammar (test-name
+ directory
+ lang-file-regexp
+ major-mode-fun
+ syntax-checker-fun
+ &optional error-nodes-regexp
+ log-file)
+ "Sweep test a tree-sitter grammar shared library looking for parse issues.
+
+File basenames matching matching LANG-FILE-REGEXP under DIRECTORY
+recursively are examined. TEST-NAME is used in messages.
+
+Each matching file is read into a temporary buffer and then
+MAJOR-MODE-FUN is called. This should be a mode that activates
+a tree-sitter grammar, i.e. calls (treesit-parser-create \\='LANGUAGE).
+
+ERROR-NODES-REGEXP, defaulting to (rx bol \"ERROR\" eos), is provided to
+`treesit-search-subtree' to look for syntax errors in the parse tree.
+
+SYNTAX-CHECKER-FUN is a function that takes a list of files and should
+return a hash table with files as the keys and the value of each key is
+either
+ (cons \"no-syntax-errors\" nil)
+ (cons \"has-syntax-errors\" \"at line N1:COL1 to N2:COL2\")
+
+Progress messages are logged to LOG-FILE which defaults to
+TEST_NAME.log.
+
+The result is:
+
+ Files-with-parse-error-nodes-but-pass-syntax-checker-fun:
+ <files with tree-sitter error nodes>
+
+ Files-that-parsed-succesfully-but-failed-syntax-checker-fun:
+ <files without tree-sitter error nodes>
+
+ Total-consistently-parsed-files: M of N
+
+When run in an interacive Emacs session, e.g.
+ M-: (sweep-LANGUAGE-ts-mode-grammar)
+the result is shown in \"*TEST-NAME*\" buffer,
+otherwise the result is displayed on stdout."
+
+ (when (not error-nodes-regexp)
+ (setq error-nodes-regexp (rx bos "ERROR" eos)))
+
+ (setq log-file (t-utils--log-create test-name log-file))
+ (when (not noninteractive)
+ (t-utils--display-result test-name directory (concat "Log: " log-file
"\n\n")))
+
+ (let ((start-time (current-time))
+ (all-lang-files (sort (mapcar #'file-truename ;; Expand "~" for the
syntax-checker-fun
+ (directory-files-recursively directory
lang-file-regexp))))
+ (lang-files-to-check '())
+ (ts-parse-result-ht (make-hash-table :test 'equal)))
+
+ (when (= (length all-lang-files) 0)
+ (user-error "No files found in directory %s recursively matching regexp
\"%s\""
+ directory lang-file-regexp))
+ (t-utils--log log-file (format "Found %d files to check %s\n"
+ (length all-lang-files) (t-utils--took
start-time)))
+
+ (dolist (lang-file all-lang-files)
+ (with-temp-buffer
+ (let (ok)
+ (t-utils--log log-file (format "Reading: %s\n" lang-file))
+ (condition-case err
+ (progn
+ (t-utils--insert-file-for-test lang-file major-mode-fun)
+ (setq ok t))
+ (error
+ (t-utils--log log-file (format "Skipping %s, %s\n"
+ lang-file (error-message-string
err)))))
+ (when ok
+ (push lang-file lang-files-to-check)
+ (let* ((root (treesit-buffer-root-node))
+ (error-node (treesit-search-subtree root error-nodes-regexp
nil t))
+ (syntax-status-pair (if error-node
+ (cons "has-syntax-errors"
(t-utils--err-loc error-node))
+ (cons "no-syntax-errors" nil))))
+ (puthash lang-file syntax-status-pair ts-parse-result-ht)
+ (t-utils--log log-file (format "ts-parse: %s > %S\n"
+ lang-file
syntax-status-pair)))))))
+
+ (when (= (length lang-files-to-check) 0)
+ (user-error "No files to check (all skipped)\n"))
+ (setq lang-files-to-check (sort lang-files-to-check))
+ (t-utils--log log-file (format "Checking %d files\n" (length
lang-files-to-check)))
+
+ (t-utils--log log-file (format "Calling %S\n" syntax-checker-fun))
+ (let ((syntax-check-result-ht (funcall syntax-checker-fun
lang-files-to-check))
+ (files-with-bad-ts-error-parse "")
+ (files-with-bad-ts-success-parse "")
+ (n-consistent-files 0))
+
+ (t-utils--log log-file (format "Examinging %S result\n"
syntax-checker-fun))
+
+ (dolist (lang-file lang-files-to-check)
+ (let ((ts-parse-file-result-pair (gethash lang-file
ts-parse-result-ht))
+ (syntax-check-file-result-pair
+ (let ((pair (gethash lang-file syntax-check-result-ht)))
+ (when (not (or (equal (car pair) "has-syntax-errors")
+ (equal (car pair) "no-syntax-errors")))
+ (user-error "Bad hash %S, %s -> %S" syntax-check-result-ht
lang-file pair))
+ pair)))
+ (if (string= (car ts-parse-file-result-pair) (car
syntax-check-file-result-pair))
+ (setq n-consistent-files (1+ n-consistent-files))
+ (pcase (car ts-parse-file-result-pair)
+ ("has-syntax-errors" ;; ts says syntax errors, syntax-check says
no errors
+ (setq files-with-bad-ts-error-parse
+ (concat files-with-bad-ts-error-parse
+ (t-utils--bad-parse-msg lang-file
+ "bad tree-sitter parse"
+ (cdr
ts-parse-file-result-pair)))))
+ ("no-syntax-errors";; ts says no syntax errors, syntax-check
says have errors
+ (setq files-with-bad-ts-success-parse
+ (concat files-with-bad-ts-success-parse
+ (t-utils--bad-parse-msg lang-file
+ "tree-sitter did not
detect error"
+ (cdr
syntax-check-file-result-pair)))))
+ (_ (cl-assert nil))))))
+
+ (let ((result
+ (concat
+ "Files-with-parse-error-nodes-but-pass-syntax-checker-fun:\n"
+ files-with-bad-ts-error-parse
+ "\n"
+ "Files-that-parsed-succesfully-but-failed-syntax-checker-fun:\n"
+ files-with-bad-ts-success-parse
+ "\n"
+ "Total-consistently-parsed-files: " (format "%d of %d\n"
n-consistent-files
+ (length
lang-files-to-check)))))
+ (t-utils--display-result test-name directory result 'no-erase)))
+
+ (t-utils--log log-file (format "FINISHED: %s %s\n" test-name
(t-utils--took start-time)))
+ (message "Finished, see: %s" log-file)))
+
(provide 't-utils)
;;; t-utils.el ends here