branch: elpa/subed
commit 88a64355523a3bc65790afc7fe6e852183df6df4
Author: Sacha Chua <[email protected]>
Commit: Sacha Chua <[email protected]>
subed-align, subed-word-data: word-level timestamps
You can now use Montreal Forced Aligner or Aeneas
to generate word timestamps using
subed-align-mfa-set-word-data or
subed-align-aeneas-set-word-data.
* subed/subed-align.el (subed-align-mfa-conda-env):
New variable. I should probably make this a defcustom someday.
(subed-align-mfa-command): New variable.
(subed-align-mfa-dictionary): New variable.
(subed-align-aeneas-set-word-data):
New command to use Aeneas for word timestamps.
(subed-align-mfa-process-environment): New.
(subed-align-mfa-set-word-data): New command.
* subed/subed-word-data.el
(subed-word-data-compare-normalized-string-distance):
New function for approximate matching.
(subed-word-data-compare-normalized-string-distance-threshold):
Use approximate matching by default.
(subed-word-data-subtitle-entries):
Use renamed fuzz ms.
(subed-word-data-fuzz-ms): Renamed.
(subed-word-data-refresh-text-properties-for-subtitle):
Use fuzz variable.
(subed-word-data-refresh-text-properties-for-subtitle):
Use fuzz variable.
(subed-word-data-refresh-text-properties): Use
fuzz variable.
---
NEWS.org | 5 ++
subed/subed-align.el | 129 +++++++++++++++++++++++++++++++++++++++++++++++
subed/subed-word-data.el | 29 ++++++++---
subed/subed.el | 2 +-
4 files changed, 156 insertions(+), 9 deletions(-)
diff --git a/NEWS.org b/NEWS.org
index 7f4f17a631..130b91de4f 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -2,6 +2,11 @@
* subed news
+** Version 1.4.1 - 2026-02-16 - Sacha Chua
+
+- You can now use Montreal Forced Aligner or Aeneas to generate word
timestamps using ~subed-align-mfa-set-word-data~ or
~subed-align-aeneas-set-word-data~.
+- subed-word-data now uses approximate matches by default.
+
** Version 1.4.0 - 2026-02-10 - Sacha Chua
- subed-word-data.el can now load word timing from TextGrid files produced by
the Montreal Forced Aligner.
diff --git a/subed/subed-align.el b/subed/subed-align.el
index f5eb8c8d90..b916f8904e 100644
--- a/subed/subed-align.el
+++ b/subed/subed-align.el
@@ -41,6 +41,14 @@
Ex:
task_adjust_boundary_nonspeech_min=0.500|task_adjust_boundary_nonspeech_string=REMOVE
will remove silence and other non-speech spans.")
+(defvar subed-align-mfa-conda-env "~/miniconda/env/aligner"
+ "Set this to the path to your Conda environment.")
+(defvar subed-align-mfa-command '("mfa" "align")
+ "Command to run the Montreal Forced Aligner.")
+(defvar subed-align-mfa-dictionary "french_mfa")
+(defvar subed-align-mfa-acoustic-model "french_mfa")
+
+
;;;###autoload
(defun subed-align-region (audio-file beg end)
"Align just the given section."
@@ -164,6 +172,127 @@ Return the new filename."
(find-file new-file))
new-file)))
+;; TODO Scope it to the region
+(defun subed-align-aeneas-set-word-data (audio-file)
+ "Align AUDIO-FILE with TEXT-FILE to get timestamps in FORMAT.
+Store the word data in `subed-word-data--cache' for use by subed-word-data.
+This uses the Aeneas forced aligner."
+ (interactive
+ (list
+ (or
+ (subed-media-file)
+ (subed-guess-media-file subed-audio-extensions)
+ (read-file-name "Audio file: "))))
+ (unless (derived-mode-p 'subed-mode)
+ (error "Must be in `subed-mode' buffer."))
+ (let ((temp-input (make-temp-file "subed-align-input" nil ".txt"))
+ (temp-output (make-temp-file "subed-align-output" nil ".txt"))
+ (json-object-type 'alist)
+ (json-array-type 'list)
+ data)
+ (write-region
+ (if (derived-mode-p 'subed-mode)
+ (mapconcat
+ (lambda (o) (elt o 3)) (subed-subtitle-list)
+ "\n\n")
+ (buffer-string))
+ nil temp-input)
+ (apply
+ #'call-process
+ (car subed-align-command)
+ nil
+ (get-buffer-create "*subed-aeneas*")
+ t
+ (append (cdr subed-align-command)
+ (list (expand-file-name audio-file)
+ temp-input
+ (format
"task_language=%s|is_text_type=mplain|os_task_file_levels=3|os_task_file_format=json%s"
+ subed-align-language
+ (if subed-align-options (concat "|"
subed-align-options) ""))
+ temp-output)))
+ (setq data (mapcar
+ (lambda (o)
+ (let-alist o
+ `((start . ,(* 1000 (string-to-number .begin)))
+ (end . ,(* 1000 (string-to-number .end)))
+ (text . ,(string-join .lines " ")))))
+ (alist-get 'fragments
+ (with-temp-buffer
+ (insert-file-contents temp-output)
+ (goto-char (point-min))
+ (json-read)))))
+ (subed-word-data--load data)
+ (delete-file temp-input)
+ (delete-file temp-output)
+ data))
+
+(defun subed-align-mfa-process-environment ()
+ "Return process-environment with conda env activated."
+ (let* ((env-bin (expand-file-name "bin" subed-align-mfa-conda-env))
+ (env-lib (expand-file-name "lib" subed-align-mfa-conda-env))
+ (path-var (concat env-bin ":" (getenv "PATH")))
+ (ld-library-path (concat env-lib ":" (or (getenv "LD_LIBRARY_PATH")
""))))
+ (list (concat "PATH=" path-var)
+ (concat "LD_LIBRARY_PATH=" ld-library-path)
+ (concat "CONDA_PREFIX=" (expand-file-name subed-align-mfa-conda-env))
+ (concat "CONDA_DEFAULT_ENV=aligner"))))
+
+;;;###autoload
+(defun subed-align-mfa-set-word-data (audio-file)
+ "Set the word data using Montreal Forced Aligner."
+ (interactive
+ (list
+ (or
+ (subed-media-file)
+ (subed-guess-media-file subed-audio-extensions)
+ (read-file-name "Audio file: "))))
+ ;; MFA expects audio and text
+ (let* ((temp-input (make-temp-file "subed-align-mfa-input" t))
+ (temp-output (make-temp-file "subed-align-mfa-output" t))
+ (input-wav (expand-file-name "input.wav" temp-input)))
+ ;; Set up input.wav
+ (if (string= (downcase (file-name-extension audio-file)) "wav")
+ (copy-file audio-file input-wav)
+ (call-process
+ subed-ffmpeg-executable
+ nil (get-buffer-create "*mfa*") nil
+ "-i"
+ audio-file
+ "-ar"
+ "16000"
+ input-wav))
+ ;; Set up input.txt
+ (write-region
+ (if (derived-mode-p 'subed-mode)
+ (mapconcat
+ (lambda (o) (elt o 3)) (subed-subtitle-list)
+ "\n\n")
+ (buffer-string))
+ nil (expand-file-name "input.txt" temp-input))
+ (let ((process-environment (append
+ (and subed-align-mfa-conda-env
(subed-align-mfa-process-environment))
+ process-environment)))
+ (apply #'call-process
+ (if subed-align-mfa-conda-env
+ (expand-file-name
+ (car subed-align-mfa-command)
+ (expand-file-name
+ "bin"
+ subed-align-mfa-conda-env))
+ (car subed-align-mfa-command))
+ nil (get-buffer-create "*mfa*") nil
+ (append
+ (cdr subed-align-mfa-command)
+ (list
+ temp-input
+ subed-align-mfa-dictionary
+ subed-align-mfa-acoustic-model
+ temp-output))))
+ (subed-word-data-load-from-file (expand-file-name "input.TextGrid"
+ temp-output))
+ (delete-directory temp-input t)
+ (delete-directory temp-output t)))
+
(defun subed-align-reinsert-comments (subtitles)
"Reinsert the comments from SUBTITLES.
Assume that the subtitles are still in the same sequence."
diff --git a/subed/subed-word-data.el b/subed/subed-word-data.el
index 46268e1371..29f07cfaee 100644
--- a/subed/subed-word-data.el
+++ b/subed/subed-word-data.el
@@ -257,7 +257,19 @@ For now, only JSON or SRV2 files are supported."
(string= (subed-word-data-normalize-word word1)
(subed-word-data-normalize-word word2)))
-(defvar subed-word-data-compare-function
'subed-word-data-compare-normalized-string=
+(defvar subed-word-data-compare-normalized-string-distance-threshold 0.2
+ "Factor used for similarity comparison.")
+
+(defun subed-word-data-compare-normalized-string-distance (word1 word2)
+ "Compare two words and return t if they are similar enough after
normalization.
+See `subed-word-data-compare-normalized-string-distance-threshold'."
+ (let ((w1 (subed-word-data-normalize-word word1))
+ (w2 (subed-word-data-normalize-word word2)))
+ (< (/ (string-distance w1 w2)
+ (* 1.0 (max (length w1) (length w2))))
+ subed-word-data-compare-normalized-string-distance-threshold)))
+
+(defvar subed-word-data-compare-function
'subed-word-data-compare-normalized-string-distance
"Function to use to compare.")
(defun subed-word-data-compare (word1 word2)
@@ -306,10 +318,10 @@ Return non-nil if they are the same after normalization."
(let ((time (assoc-default 'start (subed-word-data--look-up-word))))
(when time (- time subed-subtitle-spacing))))))
-(defun subed-word-data-subtitle-entries (&optional fuzz-factor)
+(defun subed-word-data-subtitle-entries ()
"Return the entries that start and end within the current subtitle."
- (let ((start (- (subed-subtitle-msecs-start) (or fuzz-factor
subed-subtitle-spacing)))
- (stop (+ (subed-subtitle-msecs-stop) (or fuzz-factor
subed-subtitle-spacing))))
+ (let ((start (- (subed-subtitle-msecs-start) subed-word-data-fuzz-ms))
+ (stop (+ (subed-subtitle-msecs-stop) subed-word-data-fuzz-ms)))
(seq-filter
(lambda (o)
(and (<= (or (alist-get 'end o) most-positive-fixnum) stop)
@@ -319,7 +331,7 @@ Return non-nil if they are the same after normalization."
(defvar subed-word-data-threshold 5
"Number of words to consider for matching.")
-(defvar subed-word-data-fuzz-factor 200
+(defvar subed-word-data-fuzz-ms 500
"Milliseconds to consider before or after a subtitle.")
(defun subed-word-data-refresh-text-properties-for-subtitle ()
@@ -329,7 +341,7 @@ Return non-nil if they are the same after normalization."
'(subed-word-data-start subed-word-data-end
font-lock-face))
(let* ((text-start (progn (subed-jump-to-subtitle-text) (point)))
pos
- (word-data (reverse (subed-word-data-subtitle-entries
subed-word-data-fuzz-factor)))
+ (word-data (reverse (subed-word-data-subtitle-entries)))
candidate
cand-count)
(subed-jump-to-subtitle-end)
@@ -338,12 +350,13 @@ Return non-nil if they are the same after normalization."
(setq pos (point))
(backward-word)
(let ((try-list word-data)
+ (current-word (buffer-substring (point) pos))
candidate)
(setq candidate (car try-list) cand-count 0)
(setq try-list (cdr try-list))
(while (and candidate
(< cand-count subed-word-data-threshold)
- (not (subed-word-data-compare (buffer-substring (point)
pos)
+ (not (subed-word-data-compare current-word
(alist-get 'text candidate))))
(setq candidate (car try-list) cand-count (1+ cand-count))
(when (> cand-count subed-word-data-threshold)
@@ -415,7 +428,7 @@ This only works for VTTs."
(while (not (eobp))
(let* ((text-start (progn (subed-jump-to-subtitle-text) (point)))
pos
- (word-data (reverse (subed-word-data-subtitle-entries
subed-word-data-fuzz-factor)))
+ (word-data (reverse (subed-word-data-subtitle-entries)))
candidate)
(subed-jump-to-subtitle-end)
(while (> (point) text-start)
diff --git a/subed/subed.el b/subed/subed.el
index 087d32c59f..721bc6a2df 100644
--- a/subed/subed.el
+++ b/subed/subed.el
@@ -1,6 +1,6 @@
;;; subed.el --- A major mode for editing subtitles -*- lexical-binding: t;
-*-
-;; Version: 1.4.0
+;; Version: 1.4.1
;; Maintainer: Sacha Chua <[email protected]>
;; Author: Random User
;; Keywords: convenience, files, hypermedia, multimedia