branch: elpa/subed
commit 159353949b89da76f85de607a1b271df7dc3dd7a
Author: Sacha Chua <[email protected]>
Commit: Sacha Chua <[email protected]>
New command subed-vtt-combine-separate-speaker-files
* subed/subed-vtt.el (subed-vtt-combine-separate-speaker-files):
New command.
* tests/test-subed-vtt.el ("subed-vtt"): Add test.
---
NEWS.org | 3 +++
README.org | 4 ++++
subed/subed-vtt.el | 62 ++++++++++++++++++++++++++++++++++++++++++++++---
tests/test-subed-vtt.el | 39 ++++++++++++++++++++++++++++++-
4 files changed, 104 insertions(+), 4 deletions(-)
diff --git a/NEWS.org b/NEWS.org
index 130b91de4f..1286da6094 100644
--- a/NEWS.org
+++ b/NEWS.org
@@ -1,6 +1,9 @@
#+OPTIONS: toc:nil
* subed news
+** Development
+
+- New function ~subed-vtt-combine-separate-speaker-files~ can combine VTT,
such as when you transcribe different tracks of a conversation.
** Version 1.4.1 - 2026-02-16 - Sacha Chua
diff --git a/README.org b/README.org
index 313aba0090..56a2999642 100644
--- a/README.org
+++ b/README.org
@@ -569,6 +569,10 @@ Combining the two:
nodemon -w "tests/*.el" -w "subed/*.el" -x "PATTERN=regexp make test-some"
#+end_src
+#+begin_src emacs-lisp
+(setq coverage-dir (expand-file-name "./coverage/"))
+#+end_src
+
** Contributions
:PROPERTIES:
:CUSTOM_ID: subed-contributions
diff --git a/subed/subed-vtt.el b/subed/subed-vtt.el
index 4d2c7f12bc..553bb694d5 100644
--- a/subed/subed-vtt.el
+++ b/subed/subed-vtt.el
@@ -106,11 +106,11 @@ format-specific function for MAJOR-MODE."
;; Move to first subtitle that starts at or after MSECS
(catch 'subtitle-id
(while (<= (or (subed-subtitle-msecs-start) -1) msecs)
- ;; If stop time is >= MSECS, we found a match
- (let ((cur-sub-end (subed-subtitle-msecs-stop)))
+ ;; If stop time is >= MSECS, we found a match
+ (let ((cur-sub-end (subed-subtitle-msecs-stop)))
(when (and cur-sub-end (>= cur-sub-end msecs))
(throw 'subtitle-id (subed-subtitle-id))))
- (unless (subed-forward-subtitle-id)
+ (unless (subed-forward-subtitle-id)
(throw 'subtitle-id nil))))))
;;; Traversing
@@ -614,6 +614,62 @@ Use the format-specific function for MAJOR-MODE."
(interactive)
(insert "<" (subed-msecs-to-timestamp subed-mpv-playback-position) ">"))
+;;;###autoload
+(defun subed-vtt-combine-separate-speaker-files (output-file subtitle-info)
+ "Combine subtitles from separate VTT files for speakers.
+SUBTITLE-INFO is an alist. It could be of the form:
+
+'((\"host\" . \"/path/to/host.vtt\")
+ (\"guest\" . \"/path/to/guest.vtt\")
+ ...)
+
+or:
+
+'((\"host\" subtitle subtitle subtitle) ...)
+
+where subtitle is like the result of `subed-subtitle'.
+
+Subtitles will be chronologically sorted.
+VTT speaker tags will be added.
+
+Write the results to OUTPUT-FILE.
+
+If OUTPUT-FILE is t, return a list of subtitles suitable for
`subed-create-file'.
+
+If called interactively, prompt for the output file, and then prompt for
+labels and input files until a blank label is specified.
+"
+ (interactive
+ ;; TODO
+ (list
+ (read-file-name "Output VTT: ")
+ (let (results label)
+ (while (not (string= (setq label (read-string "Label: ")) ""))
+ (push
+ (cons
+ label
+ (read-file-name (format "VTT for %s: " label)))
+ results))
+ (nreverse results))))
+ (let ((results (sort
+ (seq-mapcat
+ (lambda (group)
+ (mapcar
+ (lambda (sub)
+ (setf (elt sub 3)
+ (format "<v %s>%s</v>"
+ (car group)
+ (elt sub 3)))
+ sub)
+ (if (listp (cdr group))
+ (cdr group)
+ (subed-parse-file (cdr group)))))
+ subtitle-info)
+ :key (lambda (o) (elt o 1)))))
+ (when (stringp output-file)
+ (subed-create-file output-file results t))
+ results))
+
;;;###autoload
(add-to-list 'auto-mode-alist '("\\.vtt\\'" . subed-vtt-mode))
diff --git a/tests/test-subed-vtt.el b/tests/test-subed-vtt.el
index 8315c9beb8..170076d441 100644
--- a/tests/test-subed-vtt.el
+++ b/tests/test-subed-vtt.el
@@ -2701,4 +2701,41 @@ hi<00:00:03.459><c> welcome</c><00:00:03.850><c>
to</c><00:00:03.999><c> another
(insert "WebVTT\n\n00:00:00.003 --> 00:00:05.123\nThis is
<00:00:01.000>is a test\n\n")
(re-search-backward "01\\.000")
(forward-char 2)
- (expect (subed-timestamp-at-point) :to-equal "00:00:01.000")))))
+ (expect (subed-timestamp-at-point) :to-equal "00:00:01.000"))))
+ (describe "combining subtitles from multiple speakers"
+ (it "works for subtitle lists."
+ (expect
+ (subed-vtt-combine-separate-speaker-files
+ '(("Host"
+ (nil 1000 2000 "Sentence A")
+ (nil 4000 5000 "Sentence B"))
+ ("Guest"
+ (nil 500 2000 "Sentence 1")
+ (nil 2500 2700 "Sentence 2")
+ (nil 2701 2800 "Sentence 3"))))
+ :to-equal
+ '((nil 500 2000 "<v Guest>Sentence 1</v>")
+ (nil 1000 2000 "<v Host>Sentence A</v>")
+ (nil 2500 2700 "<v Guest>Sentence 2</v>")
+ (nil 2701 2800 "<v Guest>Sentence 3</v>")
+ (nil 4000 5000 "<v Host>Sentence B</v>"))))
+ (it "works for subtitle files."
+ (expect
+ (cl-labels
+ (((symbol-function 'subed-parse-file)
+ (lambda (filename &optional mode-func)
+ (if (string-match "host" filename)
+ '((nil 1000 2000 "Sentence A")
+ (nil 4000 5000 "Sentence B"))
+ '((nil 500 2000 "Sentence 1")
+ (nil 2500 2700 "Sentence 2")
+ (nil 2701 2800 "Sentence 3"))))))
+ (subed-vtt-combine-separate-speaker-files
+ '(("Host" . "/tmp/host.vtt")
+ ("Guest" . "/tmp/guest.vtt"))))
+ :to-equal
+ '((nil 500 2000 "<v Guest>Sentence 1</v>")
+ (nil 1000 2000 "<v Host>Sentence A</v>")
+ (nil 2500 2700 "<v Guest>Sentence 2</v>")
+ (nil 2701 2800 "<v Guest>Sentence 3</v>")
+ (nil 4000 5000 "<v Host>Sentence B</v>"))))))