From 9261a70a564034badf2c2b8c6859c46b295adb3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Vukovi=C4=87?= Date: Mon, 1 Jul 2024 11:51:10 +0200 Subject: [PATCH] make non-ja audio work without configuration (#1152) * add audio source for wiktionary format filenames on commons * allow region codes * fix files being saved in anki as mp3 * wip * refactor _getAvailableAudioSourceTypes * update docs * wording * single line list * fix no anki audio --- docs/audio.md | 49 +++++++++++++++++------ ext/data/schemas/options-schema.json | 8 +--- ext/js/display/display-audio.js | 30 ++++++++++---- ext/js/media/audio-downloader.js | 28 ++++++++++++- ext/js/pages/settings/audio-controller.js | 34 +++++++++++----- 5 files changed, 111 insertions(+), 38 deletions(-) diff --git a/docs/audio.md b/docs/audio.md index dc71b0dc5a..b40e610325 100644 --- a/docs/audio.md +++ b/docs/audio.md @@ -1,18 +1,41 @@ -### Audio Configuration +# Audio -Audio playback of terms' pronunciation can be configured in the `Audio` section of the _Settings_ page. +## Default audio sources -For Japanese terms, Yomitan provides a preconfigured audio source. More audio sources can be added in `Audio` > `Configure audio playback sources`. They will be checked for pronunciations in the order they are listed. +After looking up a term, you can click on the _speaker_ button to hear the term's pronunciation. When searching for audio, multiple audio sources are checked until the first source with audio for the term is found. If no audio is found, you will hear a short click instead. Right-clicking the button allows choosing the source manually. -For other languages, there are multiple sources of audio available. +#### Japanese -- The [Yomichan Forvo Server](https://ankiweb.net/shared/info/580654285) Anki add-on is one way to get pronunciation audio for other languages using the [Forvo](https://forvo.com/) service. - - It fetches native voices from Forvo, at the cost of a slight delay and with the tradeoff of a lower selection and quality of audio. - - After installing it, add a `Custom URL (JSON)` audio source with the URL `http://localhost:8770?term={term}&reading={reading}&language=en` (replace `en` with the desired language's ISO code). -- Yomitan can use your browser's inbuilt text-to-speech (TTS) engine. - - To enable this, just add a new playback source with the `Text-to-speech` type and choose your desired voice. This is the simplest way to get pronunciation audio, though the voices are supplied by your browser and may not support all languages. - - For instance, [Microsoft Edge](https://www.microsoft.com/en-us/edge) offers a wide selection of free Azure natural voices for a variety of languages. Edge provides over 300 voices, compared to around 25 in Google Chrome (see [here](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=stt) for a list of supported languages). - - ⚠️ Note that the TTS voices cannot be sent to Anki; this is a [limitation of the browser SpeechSynthesis API](https://github.com/themoeway/yomitan/issues/864). - - In addition, TTS audio can be inaccurate for languages with complex pronunciation such as Japanese, where words can have multiple possible readings and pitch accents. +The default audio sources for Japanese are: -With at least one working audio source in place, you can click on the _speaker_ button to hear the term's pronunciation. When searching for audio, the sources are checked in order until the first valid source is found. Right-clicking the _speaker_ button allows choosing the source manually. If no audio is found, you will hear a short click instead. +- JapanesePod101 +- Jisho.org + +#### Other languages + +The default audio sources for other languages are from Wikimedia Commons: + +- Lingua Libre +- Wiktionary + +While Commons has audio for many languages, some of them have limited coverage, and you may want to add additional sources, as described below. + +## Configuration + +Audio sources can be configured in `Settings` > `Audio` > `Configure audio playback sources` to reorder them or add new ones. Besides the four sources mentioned above, you can also use your browser's inbuilt text-to-speech (TTS) engine or add a custom URL source: + +### Text-to-speech (TTS) + +To enable this, just add a new playback source with the `Text-to-speech` type and choose your desired voice. This is the simplest way to get pronunciation audio, though there a few points to keep in mind: + +- TTS voices vary between browsers and so might not support all languages. For instance, [Microsoft Edge](https://www.microsoft.com/en-us/edge) offers a wide selection of free Azure natural voices for a variety of languages. Edge provides over 300 voices, compared to around 25 in Google Chrome (see [here](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=stt) for a list of supported languages). +- TTS audio can be inaccurate for languages with complex pronunciation such as Japanese, where words can have multiple possible readings and pitch accents. +- ⚠️ Note that the TTS voices cannot be sent to Anki; this is a [limitation of the browser SpeechSynthesis API](https://github.com/themoeway/yomitan/issues/864). + +### Custom URL + +You can add a custom URL from which audio will be fetched. One use case for this is fetching audio from Forvo: + +#### Yomichan Forvo Server + +[Forvo](https://forvo.com/) is currently the largest online pronunciation database, with native pronunciation audio for various languages. One way to get Forvo audio in Yomitan is via the [Yomichan Forvo Server](https://ankiweb.net/shared/info/580654285) Anki add-on. It fetches from Forvo, at the cost of a slight delay. After installing it in Anki, add a `Custom URL (JSON)` audio source with the URL `http://localhost:8770?term={term}&reading={reading}&language=en` (replacing `en` with the desired language's ISO code). diff --git a/ext/data/schemas/options-schema.json b/ext/data/schemas/options-schema.json index 4bd7625a23..2fb1c9c383 100644 --- a/ext/data/schemas/options-schema.json +++ b/ext/data/schemas/options-schema.json @@ -419,13 +419,7 @@ } } }, - "default": [ - { - "type": "jpod101", - "url": "", - "voice": "" - } - ] + "default": [] } } }, diff --git a/ext/js/display/display-audio.js b/ext/js/display/display-audio.js index bb30c9445e..4fcabf06a3 100644 --- a/ext/js/display/display-audio.js +++ b/ext/js/display/display-audio.js @@ -166,16 +166,15 @@ export class DisplayAudio { * @param {import('display').EventArgument<'optionsUpdated'>} details */ _onOptionsUpdated({options}) { - const {enabled, autoPlay, volume, sources} = options.audio; + const { + general: {language}, + audio: {enabled, autoPlay, volume, sources}, + } = options; this._autoPlay = enabled && autoPlay; this._playbackVolume = Number.isFinite(volume) ? Math.max(0, Math.min(1, volume / 100)) : 1; /** @type {Set} */ - const requiredAudioSources = new Set([ - 'jpod101', - 'jpod101-alternate', - 'jisho', - ]); + const requiredAudioSources = this._getRequiredAudioSources(language); /** @type {Map} */ const nameMap = new Map(); this._audioSources.length = 0; @@ -188,11 +187,28 @@ export class DisplayAudio { } const data = document.documentElement.dataset; - data.audioEnabled = `${enabled && sources.length > 0}`; + data.audioEnabled = enabled.toString(); this._cache.clear(); } + /** + * @param {string} language + * @returns {Set} + */ + _getRequiredAudioSources(language) { + return language === 'ja' ? + new Set([ + 'jpod101', + 'jpod101-alternate', + 'jisho', + ]) : + new Set([ + 'lingua-libre', + 'wiktionary', + ]); + } + /** */ _onContentClear() { this._entriesToken = {}; diff --git a/ext/js/media/audio-downloader.js b/ext/js/media/audio-downloader.js index 99ca1dfd06..17911c3e4b 100644 --- a/ext/js/media/audio-downloader.js +++ b/ext/js/media/audio-downloader.js @@ -90,7 +90,8 @@ export class AudioDownloader { */ async downloadTermAudio(sources, preferredAudioIndex, term, reading, idleTimeout, languageSummary) { const errors = []; - for (const source of sources) { + const requiredAudioSources = this._getRequiredAudioSources(languageSummary.iso, sources); + for (const source of [...sources, ...requiredAudioSources]) { let infoList = await this.getTermAudioInfoList(source, term, reading, languageSummary); if (typeof preferredAudioIndex === 'number') { infoList = (preferredAudioIndex >= 0 && preferredAudioIndex < infoList.length ? [infoList[preferredAudioIndex]] : []); @@ -115,6 +116,31 @@ export class AudioDownloader { // Private + /** + * @param {string} language + * @param {import('audio').AudioSourceInfo[]} sources + * @returns {import('audio').AudioSourceInfo[]} + */ + _getRequiredAudioSources(language, sources) { + /** @type {Set} */ + const requiredSources = language === 'ja' ? + new Set([ + 'jpod101', + 'jpod101-alternate', + 'jisho', + ]) : + new Set([ + 'lingua-libre', + 'wiktionary', + ]); + + for (const {type} of sources) { + requiredSources.delete(type); + } + + return [...requiredSources].map((type) => ({type, url: '', voice: ''})); + } + /** * @param {string} url * @param {string} base diff --git a/ext/js/pages/settings/audio-controller.js b/ext/js/pages/settings/audio-controller.js index b89ae2ba00..defef16e7f 100644 --- a/ext/js/pages/settings/audio-controller.js +++ b/ext/js/pages/settings/audio-controller.js @@ -47,6 +47,8 @@ export class AudioController extends EventDispatcher { this._voiceTestTextInput = querySelectorNotNull(document, '#text-to-speech-voice-test-text'); /** @type {import('audio-controller').VoiceInfo[]} */ this._voices = []; + /** @type {string} */ + this._language = 'ja'; } /** @type {import('./settings-controller.js').SettingsController} */ @@ -121,12 +123,18 @@ export class AudioController extends EventDispatcher { * @param {import('settings-controller').EventArgument<'optionsChanged'>} details */ _onOptionsChanged({options}) { + const { + general: {language}, + audio: {sources}, + } = options; + + this._language = language; + for (const entry of this._audioSourceEntries) { entry.cleanup(); } this._audioSourceEntries = []; - const {sources} = options.audio; for (let i = 0, ii = sources.length; i < ii; ++i) { this._createAudioSourceEntry(i, sources[i]); } @@ -216,15 +224,7 @@ export class AudioController extends EventDispatcher { * @returns {import('settings').AudioSourceType} */ _getUnusedAudioSourceType() { - /** @type {import('settings').AudioSourceType[]} */ - const typesAvailable = [ - 'jpod101', - 'jpod101-alternate', - 'jisho', - 'lingua-libre', - 'wiktionary', - 'custom', - ]; + const typesAvailable = this._getAvailableAudioSourceTypes(); for (const type of typesAvailable) { if (!this._audioSourceEntries.some((entry) => entry.type === type)) { return type; @@ -233,6 +233,20 @@ export class AudioController extends EventDispatcher { return typesAvailable[0]; } + /** + * @returns {import('settings').AudioSourceType[]} + */ + _getAvailableAudioSourceTypes() { + /** @type {import('settings').AudioSourceType[]} */ + const generalAudioSources = ['lingua-libre', 'wiktionary', 'text-to-speech', 'custom']; + if (this._language === 'ja') { + /** @type {import('settings').AudioSourceType[]} */ + const japaneseAudioSources = ['jpod101', 'jpod101-alternate', 'jisho']; + return [...japaneseAudioSources, ...generalAudioSources]; + } + return generalAudioSources; + } + /** */ async _addAudioSource() { const type = this._getUnusedAudioSourceType();