Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
####### # # E-scripts on processing and editing subtitles. # # Note 1: use the eev command (defined in eev.el) and the # ee alias (in my .zshrc) to execute parts of this file. # Executing this file as a whole makes no sense. # An introduction to eev can be found here: # # (find-eev-quick-intro) # http://angg.twu.net/eev-intros/find-eev-quick-intro.html # # Note 2: be VERY careful and make sure you understand what # you're doing. # # Note 3: If you use a shell other than zsh things like |& # and the for loops may not work. # # Note 4: I always run as root. # # Note 5: some parts are too old and don't work anymore. Some # never worked. # # Note 6: the definitions for the find-xxxfile commands are on my # .emacs. # # Note 7: if you see a strange command check my .zshrc -- it may # be defined there as a function or an alias. # # Note 8: the sections without dates are always older than the # sections with dates. # # This file is at <http://angg.twu.net/e/subtitles.e> # or at <http://angg.twu.net/e/subtitles.e.html>. # See also <http://angg.twu.net/emacs.html>, # <http://angg.twu.net/.emacs[.html]>, # <http://angg.twu.net/.zshrc[.html]>, # <http://angg.twu.net/escripts.html>, # and <http://angg.twu.net/>. # ####### # «.subed» (to "subed") # «.subed-mpv-low-level» (to "subed-mpv-low-level") # «.subed-keys» (to "subed-keys") # «.subed-git» (to "subed-git") # «.waveform-test» (to "waveform-test") # «.waveform-test-2» (to "waveform-test-2") # «.mpv-geometry» (to "mpv-geometry") # «.pysubs2» (to "pysubs2") # «.gryms-code» (to "gryms-code") # «.Subtitles.lua» (to "Subtitles.lua") # «.aeneas» (to "aeneas") # «.angle-brackets» (to "angle-brackets") # «.waveforms» (to "waveforms") <edrx> sachac: I used this to download the automatic subtitles of the video that I have just recorded - https://0x0.st/oTrh.txt - but the .vtt file looks like this: https://0x0.st/oTrC.txt <edrx> sachac: this is tricky to edit because of the timestamps on words - like <00:00:04.400><c> and</c><00:00:04.880><c> this</c><00:00:05.120>... <edrx> sachac: do you have tricks to convert that to another format? I tried pysubs2 - more precisely: pip3 install pysubs2; pysubs2 --help; pysubs2 --to srt *.vtt ##### # # subed # 2022apr19 # ##### # «subed» (to ".subed") # (find-es "emacs" "subed") # (find-epackage-links 'subed "subed" t) # (find-epackage 'subed) # (code-c-d "subed" "~/.emacs.d/elpa/subed-1.0.10/") # (code-c-d "subed" "~/.emacs.d/elpa/subed/") # (find-subedfile "") # (find-subedfile "subed/subed-mpv.el") # https://github.com/sachac/subed # (find-subedgrep "grep --color=auto -niH --null -e cps *") # (find-es "mplayer" "geometry") -geometry 400x300+520+100 # (find-efunctiondescr 'customize-apropos) # (find-efunction 'customize-apropos) # (find-efunction 'customize-apropos "apropos-read-pattern") # (customize-apropos "") # (customize-apropos "subed") # (find-customizegroup 'subed) # (customize-group 'subed) # (find-efunction 'find-customizegroup) # (customize-variable 'subed-default-subtitle-length) # (customize-option 'subed-default-subtitle-length) ##### # # Low-level tests for subed-mpv.el # 2022oct09 # ##### # «subed-mpv-low-level» (to ".subed-mpv-low-level") # (find-fline "~/LOGS/2022oct09.emacs" "<quiliro> edrx: subed-mpv is not found") # (to "subed") # (require 'subed-mpv) # (find-eapropos "subed-mpv") # (find-efunction 'subed-mpv--play) # (find-efunction 'subed-mpv-jump) # (find-efunction 'subed-mpv-pause) # (find-efunction 'subed-mpv-unpause) ;; Check that you have the video: (find-1stclassvideo-links "2022pict2elua") (find-2022pict2eluavideo "0:00") ;; Copy it and its subtitles to /tmp/: (find-eevvideossh0 "cp -v 2022-pict2e-lua.{mp4,vtt} /tmp/") (setq my-video "/tmp/2022-pict2e-lua.mp4") ;; Then... (require 'subed-mpv) (subed-mpv--play my-video) (subed-mpv-jump 20000) (subed-mpv-jump 200000) (subed-mpv-pause) (subed-mpv-unpause) ##### # # subed-keys # 2022jul15 # ##### # «subed-keys» (to ".subed-keys") # (find-eevvideosfile "2022jul10-apresentacao-C2.vtt") # (find-es "emacs" "transparence-in-keymaps") # (find-subedfile "subed.el" "(defconst subed-mode-map") # (require 'subed) # (find-ekeymapdescr subed-mode-map) ;; M-j subed-mpv-jump-to-current-subtitle ;; M-k subed-kill-subtitle (define-key subed-mode-map (kbd "M-j") nil) (define-key subed-mode-map (kbd "M-k") nil) (define-key subed-mode-map (kbd "M-p") nil) (defun j () (interactive) (subed-mpv-jump-to-current-subtitle)) (defun k () (interactive) (subed-kill-subtitle)) (defun sk () (interactive) (find-es "subtitles" "subed-keys")) (defun sk () (interactive) (find-2b '(find-es "subtitles" "subed-keys"))) (defun sk () (interactive) (find-2a '(find-subedfile "subed.el" "(defconst subed-mode-map"))) ##### # # subed-git # 2023apr09 # ##### # «subed-git» (to ".subed-git") # (find-epackage-links 'subed "subed" t) # (find-es "emacs" "package-vc-install") # (package-vc-install "https://github.com/sachac/subed") # https://github.com/sachac/subed/issues/64 # (find-git-links "https://github.com/sachac/subed" "subed") * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) rm -Rfv ~/usrc/subed/ cd ~/usrc/ git clone https://github.com/sachac/subed cd ~/usrc/subed/ export PAGER=cat git branch --list -a git for-each-ref git log --oneline --graph --all -20 git checkout main git checkout waveform # (find-fline "~/usrc/") # (find-fline "~/usrc/subed/") # (find-gitk "~/usrc/subed/") # (code-c-d "subed" "~/usrc/subed/") # (code-c-d "subeds" "~/usrc/subed/subed/") # (find-subedfile "") # (find-subedsfile "") # (find-subedsh "find * | sort") # (find-subedfile "subed/subed-waveform.el") # (find-subedsgrep "grep --color=auto -nH --null -e auto-mode *.el") # (find-1stclassvideo-links "eev2021") # (find-psne-1stclassvideo-links "eev2021") # (add-to-list 'load-path "~/usrc/subed/subed/") # (find-es "emacs" "locate-library") # (locate-library "subed-waveform.el") # (find-subedsfile "subed-waveform.el") # (find-subedsfile "subed-vtt.el") (require 'subed) (require 'subed-waveform) (require 'subed-vtt) (find-subedsfile "subed.el") * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) # (find-fline "/tmp/subtest/") # (find-fline "/tmp/subtest/emacsconf2021.vtt") rm -Rv /tmp/subtest/ mkdir /tmp/subtest/ cd /tmp/subtest/ # wget http://anggtwu.net/eev-videos/emacsconf2021.mp4 # wget http://anggtwu.net/eev-videos/emacsconf2021.vtt cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.mp4 . cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.vtt . • (eepitch-shell) • (eepitch-kill) • (eepitch-shell) rm -Rv /tmp/sub/ mkdir /tmp/sub/ cd /tmp/sub/ wget -O a.mp4 http://anggtwu.net/eev-videos/emacsconf2021.mp4 wget -O a.vtt http://anggtwu.net/eev-videos/emacsconf2021.vtt •• (find-evariable 'subed-mpv-media-file) • (setq subed-mpv-media-file "/tmp/sub/a.mp4") * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) # We will use the local copy of this video: # (find-1stclassvideo-links "eev2021") rm -Rv /tmp/sub/ mkdir /tmp/sub/ cd /tmp/sub/ cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.mp4 a.mp4 cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.vtt a.vtt # Now visit its .vtt, # type M-x eeit between two subtitles, # and try the test block... * (find-fline "/tmp/sub/a.vtt") subed-mpv--socket, subed-mpv--server-start, subed-mpv--server-stop ##### # # Test subed-waveform # 2023aug03 # ##### # «waveform-test» (to ".waveform-test") ** Part 0: make sure that you don't have subed ** installed as an Emacs package. If this ** (locate-library "subed.el") ** points to a subed.el outside ~/usrc/subed/, ** then the simplest way to get rid of that ** subed-the-package is to run the right sexps ** from the temporary buffer generated by this: ** (find-epackage-links 'subed) ** Part 1: install the waveform branch of subed ** in ~/usrc/subed/ and run some requires. ** * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) mkdir -p ~/usrc/subed/ rm -Rfv ~/usrc/subed/ cd ~/usrc/ git clone https://github.com/sachac/subed cd ~/usrc/subed/ export PAGER=cat git branch --list -a git for-each-ref git log --oneline --graph --all -20 # git checkout main git checkout waveform # (find-fline "~/usrc/") # (find-fline "~/usrc/subed/") # (find-gitk "~/usrc/subed/") * (code-c-d "subed" "~/usrc/subed/") * (code-c-d "subeds" "~/usrc/subed/subed/") ** (find-subedfile "") ** (find-subedsfile "") ** (find-subedsh "find * | sort") ** (find-subedssh "find * | sort") ** (find-subedsfile "subed-waveform.el") ** Part 2: prepare a subdirectory in /tmp/ ** with a short video in .mp4 and its subtitles ** in .vtt: ** (find-eev2021video "0:00") ** (find-1stclassvideo-links "eev2021") ** (find-psne-1stclassvideo-links "eev2021") * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) rm -Rv /tmp/subtest/ mkdir /tmp/subtest/ cd /tmp/subtest/ # wget http://anggtwu.net/eev-videos/emacsconf2021.mp4 # wget http://anggtwu.net/eev-videos/emacsconf2021.vtt cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.mp4 . cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.vtt . # (find-fline "/tmp/subtest/") # (find-video "/tmp/subtest/emacsconf2021.mp4") # (find-fline "/tmp/subtest/emacsconf2021.vtt") ** Part 3: require subed-waveform. * * (add-to-list 'load-path "~/usrc/subed/subed/") * (require 'subed) * (require 'subed-waveform) * (require 'subed-vtt) * (require 'svg) * ** Test: check that some `locate-library's ** return files in ~/usrc/subed/subed/: ** (locate-library "subed.el") ** (locate-library "subed-vtt.el") ** (locate-library "subed-waveform.el") ##### # # waveform-test-2 # 2023aug13 # ##### # «waveform-test-2» (to ".waveform-test-2") # (find-es "ffmpeg" "waveform") * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) rm -Rv /tmp/subtest/ mkdir /tmp/subtest/ cd /tmp/subtest/ # wget http://anggtwu.net/eev-videos/emacsconf2021.mp4 # wget http://anggtwu.net/eev-videos/emacsconf2021.vtt cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.mp4 . cp -v $S/http/anggtwu.net/eev-videos/emacsconf2021.vtt . cd /tmp/subtest/ ffmpeg \ -accurate_seek -ss 5 -to 10 -i emacsconf2021.mp4 \ -loglevel 0 \ -filter_complex volume=2.0,showwavespic=s=400x100:colors=gray \ -frames:v 1 -c:v png -f image2 \ o.png # (find-fline "/tmp/subtest/o.png") ##### # # mpv-geometry # 2022may02 # ##### # «mpv-geometry» (to ".mpv-geometry") # (find-fline "/sda5/videos/" "A_Tour_of_the_Acme_Editor-dP1xVpMPn8M.webm") * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) mpv -geometry 400x300+520+100 /sda5/videos/A_Tour_of_the_Acme_Editor-dP1xVpMPn8M.webm mpv --geometry=400x300+520+100 /sda5/videos/A_Tour_of_the_Acme_Editor-dP1xVpMPn8M.webm # (find-customizevariable 'subed-mpv-arguments) # (find-evariable-links 'subed-mpv-arguments) # (find-fline "~/.emacs.custom" "subed-mpv-arguments") ##### # # pysubs2 # 2022apr19 # ##### # «pysubs2» (to ".pysubs2") rm -Rv /tmp/edrx-pict2e/ mkdir /tmp/edrx-pict2e/ cd /tmp/edrx-pict2e/ yt-dlp --write-sub --write-auto-sub \ --skip-download -o 2022-pict2e-lua \ "http://www.youtube.com/watch?v=hiHsUhGVLGM" wget http://angg.twu.net/eev-videos/2022-pict2e-lua.mp4 ls -lAF mpv /tmp/edrx-pict2e/2022-pict2e-lua.mp4 ##### # # gryms-code # 2022apr19 # ##### # «gryms-code» (to ".gryms-code") # (find-yttranscript-links "2022pict2elua" "hiHsUhGVLGM") # (find-efunction 'find-yttranscript-links) # (find-fline "~/LOGS/2022apr19.emacs") # https://bpa.st/OKIQ * (python-mode) * (eepitch-python) * (eepitch-kill) * (eepitch-python) import youtube_transcript_downloader url = "http://www.youtube.com/watch?v=hiHsUhGVLGM" f = "find-2022pict2eluavideo" tr = youtube_transcript_downloader.get_transcript(url) trits0 = tr.items() trits1 = '\n'.join(('% (' + f + ' "' + key + '" "' + text + '")' for key, text in trits0)) print(trits1) trits2 = tuple(trits0) trits3 = zip(trits2, trits2[1:]) vtts = [] for (start,thistext),(stop,nexttext) in trits3: vtts.append(f"{start} --> {stop}\n{thistext}\n") header = "WEBVTT\nKind: captions:\nLanguage: en-GB\n" body = "\n".join((header, *vtts)) print(body) ##### # # Subtitles.lua # 2022apr20 # ##### # «Subtitles.lua» (to ".Subtitles.lua") # (find-anggfile "LUA/Subtitles.lua") ##### # # aeneas # 2022nov11 # ##### # «aeneas» (to ".aeneas") # (find-fline "~/LOGS/2022nov11.emacs" "<sachac> edrx: oh, you can use aeneas") # https://www.readbeyond.it/aeneas/ # https://github.com/readbeyond/aeneas # https://pypi.org/project/aeneas/1.4.0.0/ # https://www.youtube.com/watch?v=xP870sdyCXE # https://media.emacsconf.org/2022/backstage/editing-captions.html ##### # # Fixing the "<>"s in my subtitles class # 2023apr16 # ##### # «angle-brackets» (to ".angle-brackets") # (find-1stclassvideo-links "eev2021") # (find-eev2021video "0:14") # https://www.youtube.com/watch?v=qM0Luz78qGw # (find-yttranscript-links "{c}" "qM0Luz78qGw") # (find-importlib-links "youtube-transcript-downloader") # https://github.com/t4skmanag3r/youtube_transcript_downloader/issues/1 ##### # # waveforms # 2023jun23 # ##### # «waveforms» (to ".waveforms") # https://mbork.pl/2023-06-19_Emacs_Subed_mode_can_now_display_waveforms # https://github.com/sachac/waveform-el # (code-c-d "waveform" "~/usrc/waveform-el/") # (find-waveformfile "") # (find-waveformfile "waveform.el") # (load "~/usrc/waveform-el/waveform.el") # (find-eevvideosfile "" "emacsconf2021") emacsconf2021.mp4 * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) https://sachachua.com/blog/2022/10/subed-el-word-level-timing-improvements/ https://sachachua.com/dotemacs/index.html (defun my-caption-download-srv2 (id) (interactive "MID: ") (require 'subed-word-data) (when (string-match "v=\\([^&]+\\)" id) (setq id (match-string 1 id))) (let ((default-directory "/tmp")) (call-process "yt-dlp" nil nil nil "--write-auto-sub" "--write-sub" "--no-warnings" "--sub-lang" "en" "--skip-download" "--sub-format" "srv2" (concat "https://youtu.be/" id)) (subed-word-data-load-from-file (my-latest-file "/tmp" "\\.srv2\\'")))) (defun my-caption-fix-common-errors (data) (mapc (lambda (o) (mapc (lambda (e) (when (string-match (concat "\\<" (regexp-opt (if (listp e) (seq-remove (lambda (s) (string= "" s)) e) (list e))) "\\>") (alist-get 'text o)) (map-put! o 'text (replace-match (car (if (listp e) e (list e))) t t (alist-get 'text o))))) my-subed-common-edits)) data)) https://news.ycombinator.com/item?id=34105063 BBC Subtitle Guidelines (bbc.co.uk) https://sachachua.com/dotemacs/ https://sachachua.com/dotemacs/#subed https://melpa.org/#/opensub # (find-eevvideosfile "2022jul10-apresentacao-C2.vtt") http://mbork.pl/2022-09-05_Comments_in_srt_files <sachac> edrx: set subed-auto-find-video to nil # (find-fline "~/LOGS/2023jan03.emacs" "emacsconf-subed") https://sachachua.com/blog/2023/12/using-subed-record-in-emacs-to-edit-audio-and-clean-up-oopses/ https://thesquareplanet.com/blog/ai-captioning/ # Local Variables: # coding: utf-8-unix # End: