Encoder improvements (#127)

* Encoder improvements * preserve aspect ratio using -2 * add comment * restore lost comment * update comment * format code use h.unpack() to avoid crashing mpvacious * add comment * add function quality_to_crf_avif() * extract function * extract function * remove duplicate numbers * add function quality_to_crf_jpeg() * Disable loudnorm for compatibility; user filters fix; refactor quality and scale functions * Mention opus_container in README --------- Co-authored-by: Ren Tatsumoto <[email protected]>
Ajatt-Tools · Jun 22, 2024 · aa86597 · aa86597
1 parent f27211b
commit aa86597
Show file tree

Hide file tree

Showing 5 changed files with 559 additions and 197 deletions.
diff --git a/.github/RELEASE/subs2srs.conf b/.github/RELEASE/subs2srs.conf
@@ -106,7 +106,7 @@ append_media=yes
 # Remove text in brackets before substituting %n into tag
 tag_nuke_brackets=yes
 
-# Remove text in brackets before substituting %n into tag
+# Remove text in parentheses before substituting %n into tag
 tag_nuke_parentheses=no
 
 # Remove the episode number before substituting %n into tag
@@ -182,6 +182,10 @@ audio_template=[sound:%s]
 # If enabled, generates animated snapshots (something like GIFs) instead of static snapshots.
 animated_snapshot_enabled=no
 
+# Animated snapshot format. Like "snapshot_format" but for animated images. Can be either avif or webp.
+animated_snapshot_format=avif
+#animated_snapshot_format=webp
+
 # Number of frame per seconds, a value between 0 and 30 (30 included)
 # Higher values will increase both quality and file size, lower values will do the opposite
 animated_snapshot_fps=10
@@ -201,11 +205,23 @@ animated_snapshot_quality=5
 
 # Audio format.
 # Opus is the recommended format.
-# It may be required to use a different format when Opus is not supported.
-# This is the case on certain computers or devices which are running proprietary operating systems.
 audio_format=opus
 #audio_format=mp3
-#audio_format=caf
+
+# Container for opus files.
+# It may be required to use a different container for Opus.
+# This is the case on certain computers or devices
+# which are running proprietary operating systems, e.g. AnkiMobile. Using them is discouraged.
+# ・ Ogg/Opus play everywhere except AnkiWeb in Safari and AnkiMobile.
+# ・ M4A (iOS 17.2 and probably even earlier) and WEBM (since iOS 17.4) play everywhere.
+# ・ Opus in CAF can be used with older iOS. CAF plays only on Anki Desktop, Safari and AnkiMobile.
+# ・ (iOS Lockdown Mode disables Opus support completely,
+#    though you may try to add an exception for AnkiMobile.)
+opus_container=ogg
+#opus_container=opus
+#opus_container=m4a
+#opus_container=webm
+#opus_container=caf
 
 # Sane values are 16k-32k for opus, 64k-128k for mp3.
 audio_bitrate=24k
@@ -275,18 +291,36 @@ secondary_sub_area=0.15
 # Default binding to cycle this value: Ctrl+v.
 secondary_sub_visibility=auto
 
+# Perform two-pass loudness normalization.
+# Parameter explanation can be found e.g. at:
+# https://auphonic.com/blog/2013/01/07/loudness-targets-mobile-audio-podcasts-radio-tv/
+# https://auphonic.com/blog/2019/08/19/dynamic-range-processing/
+# MAKE SURE TO REMOVE loudnorm FROM CUSTOM ARGS BEFORE ENABLING.
+loudnorm=no
+loudnorm_target=-16
+loudnorm_range=11
+loudnorm_peak=-1.5
+
 ##
 ## Custom audio encoding arguments
 ## These arguments are added to the command line.
 ## `mpv` and `ffmpeg` accept slightly different parameters.
 ## Feel free to experiment for yourself, but be careful or media creation might stop working.
 ##
 
+# loudnorm IN CUSTOM ARGS IS LEFT FOR BACKWARD COMPATIBILITY.
+# MAKE SURE TO REMOVE ALL MENTIONS OF loudnorm FROM CUSTOM ARGS
+# (E.G. SET TO EMPTY STRINGS) BEFORE ENABLING TWO-PASS loudnorm.
+# ENABLING loudnorm BOTH THROUGH THE SWITCH AND THROUGH CUSTOM ARGS
+# CAN LEAD TO UNPREDICTABLE RESULTS.
+
 # Ffmpeg
-ffmpeg_audio_args=-af loudnorm=I=-16:TP=-1.5:LRA=11
+ffmpeg_audio_args=-af loudnorm=I=-16:TP=-1.5:LRA=11:dual_mono=true
+#ffmpeg_audio_args=
 #ffmpeg_audio_args=-af silenceremove=1:0:-50dB
 
 # mpv
 # mpv accepts each filter as a separate argument, e.g. --af-append=1 --af-append=2
-mpv_audio_args=--af-append=loudnorm=I=-16:TP=-1.5:LRA=11
+mpv_audio_args=--af-append=loudnorm=I=-16:TP=-1.5:LRA=11:dual_mono=true
+#mpv_audio_args=
 #mpv_audio_args=--af-append=silenceremove=1:0:-50dB
diff --git a/README.md b/README.md
@@ -198,11 +198,12 @@ and `avif` or `webp` for images,
 as they greatly reduce the size of the generated files.
 
 If you still use AnkiMobile (the [proprietary](https://www.gnu.org/proprietary/) Anki app),
-set `audio_format` to [caf](https://en.wikipedia.org/wiki/Core_Audio_Format) for compatibility.
-The resulting files will use `Opus` as the coding format and Apple's Core Audio
-format as the container format, but will still have the `.ogg` extension to make
-it easier to open in media players because the `.caf` extension is not commonly
-recognized.
+set `opus_container` to `m4a` or `webm`. I'll allow iOS to play Opus files, while still maintaining
+compatibility with non-Apple devices. For really old iOS devices, set `opus_container` to
+[`caf`](https://en.wikipedia.org/wiki/Core_Audio_Format). CAF plays only on Anki Desktop,
+AnkiWeb in Safari and AnkiMobile, and is really not recommended. (Please note that
+[Lockdown Mode](https://support.apple.com/en-us/105120) completely disables Opus and AVIF support,
+though you may try to add an exception for AnkiMobile.)
 
 If no matter what mpvacious fails to create audio clips and/or snapshots,
 change `use_ffmpeg` to `yes`.

diff --git a/cfg_mgr.lua b/cfg_mgr.lua
@@ -11,6 +11,13 @@ local h = require('helpers')
 
 local min_side_px = 42
 local max_side_px = 640
+local default_height_px = 200
+
+-- This constant should be used in place of width and/or height in the config file.
+-- It tells the encoder to preserve aspect ratio when downscaling snapshots.
+-- The user almost always wants to set either width or height to this value.
+-- Note: If set to -1, encoding will fail with the "height/width not divisible by 2" error.
+local preserve_aspect_ratio = -2
 
 local self = {
     config = nil,
@@ -21,12 +28,23 @@ local self = {
 local default_profile_filename = 'subs2srs'
 local profiles_filename = 'subs2srs_profiles'
 
+local function set_file_extension_for_opus()
+    -- Default to OGG, then change if an extension is supported.
+    -- https://en.wikipedia.org/wiki/Core_Audio_Format
+    self.config.audio_extension = '.ogg'
+    for _, extension in ipairs({ 'opus', 'm4a', 'webm', 'caf' }) do
+        if extension == self.config.opus_container then
+            self.config.audio_extension = '.' .. self.config.opus_container
+            break
+        end
+    end
+end
+
 local function set_audio_format()
-    if self.config.audio_format == 'opus' or self.config.audio_format == 'caf' then
+    if self.config.audio_format == 'opus' then
         -- https://opus-codec.org/
-        -- https://en.wikipedia.org/wiki/Core_Audio_Format
         self.config.audio_codec = 'libopus'
-        self.config.audio_extension = '.ogg'
+        set_file_extension_for_opus()
     else
         self.config.audio_codec = 'libmp3lame'
         self.config.audio_extension = '.mp3'
@@ -44,20 +62,27 @@ local function set_video_format()
         self.config.snapshot_extension = '.jpg'
         self.config.snapshot_codec = 'mjpeg'
     end
+
     -- Animated webp images can only have .webp extension.
-    -- The user has no choice on this.
-    self.config.animated_snapshot_extension = '.webp'
+    -- The user has no choice on this. Same logic for avif.
+    if self.config.animated_snapshot_format == 'avif' then
+        self.config.animated_snapshot_extension = '.avif'
+        self.config.animated_snapshot_codec = 'libaom-av1'
+    else
+        self.config.animated_snapshot_extension = '.webp'
+        self.config.animated_snapshot_codec = 'libwebp'
+    end
 end
 
 local function ensure_in_range(dimension)
-    self.config[dimension] = self.config[dimension] < min_side_px and -2 or self.config[dimension]
+    self.config[dimension] = self.config[dimension] < min_side_px and preserve_aspect_ratio or self.config[dimension]
     self.config[dimension] = self.config[dimension] > max_side_px and max_side_px or self.config[dimension]
 end
 
 local function conditionally_set_defaults(width, height, quality)
     if self.config[width] < 1 and self.config[height] < 1 then
-        self.config[width] = -2
-        self.config[height] = 200
+        self.config[width] = preserve_aspect_ratio
+        self.config[height] = default_height_px
     end
     if self.config[quality] < 0 or self.config[quality] > 100 then
         self.config[quality] = 15
@@ -171,4 +196,6 @@ return {
     reload_from_disk = reload_from_disk,
     init = init,
     next_profile = next_profile,
+    default_height_px = default_height_px,
+    preserve_aspect_ratio = preserve_aspect_ratio,
 }