diff --git a/packages/tests/fixtures/transcription/README.md b/packages/tests/fixtures/transcription/README.md deleted file mode 100644 index c72967581b56..000000000000 --- a/packages/tests/fixtures/transcription/README.md +++ /dev/null @@ -1,6 +0,0 @@ -🇫🇷 DRANE Occitanie - Communiquer lors d'une classe transplantée -[./communiquer-lors-dune-classe-transplantee.mp4](videos/communiquer-lors-dune-classe-transplantee.mp4) -> https://podeduc.apps.education.fr/numerique-educatif/video/21893-communiquer-lors-dune-classe-transplantee/ -> -> CC BY-NC-SA 4.0 Deed -> Attribution-NonCommercial-ShareAlike 4.0 International diff --git a/packages/tests/fixtures/transcription/videos/README.md b/packages/tests/fixtures/transcription/videos/README.md new file mode 100644 index 000000000000..b892a7553046 --- /dev/null +++ b/packages/tests/fixtures/transcription/videos/README.md @@ -0,0 +1,16 @@ +🇫🇷 DRANE Occitanie - Communiquer lors d'une classe transplantée +[./communiquer-lors-dune-classe-transplantee.mp4](communiquer-lors-dune-classe-transplantee.mp4) +> https://podeduc.apps.education.fr/numerique-educatif/video/21893-communiquer-lors-dune-classe-transplantee/ +> +> CC BY-NC-SA 4.0 Deed +> Attribution-NonCommercial-ShareAlike 4.0 International + +🇫🇷 [Accompagner la victime d'une dérive sectaire ou d'une emprise mentale](https://www.fun-mooc.fr/fr/cours/accompagner-la-victime-de-derive-sectaire/) +> Centre Contre les Manipulations Mentales (CCMM) +> [CC BY-NC-ND 4.0 Deed](https://creativecommons.org/licenses/by-nc-nd/4.0/) +> Attribution-NonCommercial-NoDerivs 4.0 International + +🇺🇸 [The Last Man On Earth (1964)](https://archive.org/details/TheLastManOnEarthHD) +> PDM 1.0 Deed +> Public Domain Mark 1.0 Universal +> https://creativecommons.org/publicdomain/mark/1.0/ diff --git a/packages/tests/fixtures/transcription/videos/the_last_man_on_earth.mp4 b/packages/tests/fixtures/transcription/videos/the_last_man_on_earth.mp4 new file mode 100644 index 000000000000..45ef4325e354 Binary files /dev/null and b/packages/tests/fixtures/transcription/videos/the_last_man_on_earth.mp4 differ diff --git a/packages/tests/fixtures/transcription/videos/the_last_man_on_earth.txt b/packages/tests/fixtures/transcription/videos/the_last_man_on_earth.txt new file mode 100644 index 000000000000..2a8ed1a11347 --- /dev/null +++ b/packages/tests/fixtures/transcription/videos/the_last_man_on_earth.txt @@ -0,0 +1,5 @@ +December, 1965. +Is that all it has been since +I inherited the world? +Only three years. +It seems like a hundred million. diff --git a/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts b/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts index f00c21f5b138..c000e8f5c373 100644 --- a/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts +++ b/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts @@ -1,4 +1,4 @@ -/* eslint-disable @typescript-eslint/no-unused-expressions */ +/* eslint-disable @typescript-eslint/no-unused-expressions, max-len */ import { expect, config } from 'chai' import { createLogger } from 'winston' import { join } from 'path' @@ -16,7 +16,7 @@ config.truncateThreshold = 0 describe('Open AI Whisper transcriber', function () { const transcriptDirectory = join(root(), 'test-transcript') - const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4') + const shortVideoPath = buildAbsoluteFixturePath('transcription/videos/the_last_man_on_earth.mp4') const frVideoPath = buildAbsoluteFixturePath('transcription/videos/derive_sectaire.mp4') const referenceTranscriptFile = new TranscriptFile({ path: buildAbsoluteFixturePath('transcription/videos/derive_sectaire.txt'), @@ -43,7 +43,7 @@ describe('Open AI Whisper transcriber', function () { it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () { const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en' }) expect(await transcript.equals(new TranscriptFile({ - path: join(transcriptDirectory, 'video_short.vtt'), + path: join(transcriptDirectory, 'the_last_man_on_earth.vtt'), language: 'en', format: 'vtt' }))).to.be.true @@ -51,8 +51,8 @@ describe('Open AI Whisper transcriber', function () { expect(await transcript.read()).to.equals( `WEBVTT -00:00.000 --> 00:02.000 -You +00:00.000 --> 00:13.000 +December 1965, is that all it has been since I inherited the world only three years, seems like a hundred million. ` ) @@ -61,15 +61,15 @@ You it('May produce a transcript file in the `srt` format', async function () { const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'srt' }) expect(await transcript.equals(new TranscriptFile({ - path: join(transcriptDirectory, 'video_short.srt'), + path: join(transcriptDirectory, 'the_last_man_on_earth.srt'), language: 'en', format: 'srt' }))).to.be.true expect(await transcript.read()).to.equal( `1 -00:00:00,000 --> 00:00:02,000 -You +00:00:00,000 --> 00:00:13,000 +December 1965, is that all it has been since I inherited the world only three years, seems like a hundred million. ` ) @@ -78,13 +78,14 @@ You it('May produce a transcript file in the `txt` format', async function () { const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'txt' }) expect(await transcript.equals(new TranscriptFile({ - path: join(transcriptDirectory, 'video_short.txt'), + path: join(transcriptDirectory, 'the_last_man_on_earth.txt'), language: 'en', format: 'txt' }))).to.be.true - expect(await transcript.read()).to.equal(`You -`) + expect(await transcript.read()).to.equal(`December 1965, is that all it has been since I inherited the world only three years, seems like a hundred million. +` + ) }) it('May transcribe a media file using a local PyTorch model', async function () { diff --git a/packages/tests/src/transcription/whisper/transcriber/timestamped-transcriber.spec.ts b/packages/tests/src/transcription/whisper/transcriber/timestamped-transcriber.spec.ts index ad6dcfc03b9d..467a65391d60 100644 --- a/packages/tests/src/transcription/whisper/transcriber/timestamped-transcriber.spec.ts +++ b/packages/tests/src/transcription/whisper/transcriber/timestamped-transcriber.spec.ts @@ -18,13 +18,8 @@ config.truncateThreshold = 0 describe('Linto timestamped Whisper transcriber', function () { const transcriptDirectory = join(root(), 'test-transcript') - const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4') + const shortVideoPath = buildAbsoluteFixturePath('transcription/videos/the_last_man_on_earth.mp4') const frVideoPath = buildAbsoluteFixturePath('transcription/videos/derive_sectaire.mp4') - const referenceTranscriptFile = new TranscriptFile({ - path: buildAbsoluteFixturePath('transcription/videos/derive_sectaire.txt'), - language: 'fr', - format: 'txt' - }) const transcriber = new WhisperTimestampedTranscriber( { name: 'whisper-timestamped', @@ -45,7 +40,7 @@ describe('Linto timestamped Whisper transcriber', function () { it('Should transcribe a media file and produce a transcript file in `vtt` with a ms precision', async function () { const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en' }) expect(await transcript.equals(new TranscriptFile({ - path: join(transcriptDirectory, 'video_short.vtt'), + path: join(transcriptDirectory, 'the_last_man_on_earth.vtt'), language: 'en', format: 'vtt' }))).to.be.true @@ -53,8 +48,14 @@ describe('Linto timestamped Whisper transcriber', function () { expect(await transcript.read()).to.equals( `WEBVTT -00:02.480 --> 00:02.500 -you +00:00.460 --> 00:02.080 +December 1965. + +00:03.700 --> 00:08.800 +Is that all it has been since I inherited the world only three years? + +00:10.420 --> 00:11.900 +Seems like a hundred million. ` ) @@ -63,15 +64,23 @@ you it('May produce a transcript file in the `srt` format with a ms precision', async function () { const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'srt' }) expect(await transcript.equals(new TranscriptFile({ - path: join(transcriptDirectory, 'video_short.srt'), + path: join(transcriptDirectory, 'the_last_man_on_earth.srt'), language: 'en', format: 'srt' }))).to.be.true expect(await transcript.read()).to.equals( `1 -00:00:02,480 --> 00:00:02,500 -you +00:00:00,460 --> 00:00:02,080 +December 1965. + +2 +00:00:03,700 --> 00:00:08,800 +Is that all it has been since I inherited the world only three years? + +3 +00:00:10,420 --> 00:00:11,900 +Seems like a hundred million. ` ) @@ -80,12 +89,14 @@ you it('May produce a transcript file in `txt` format', async function () { const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'txt' }) expect(await transcript.equals(new TranscriptFile({ - path: join(transcriptDirectory, 'video_short.txt'), + path: join(transcriptDirectory, 'the_last_man_on_earth.txt'), language: 'en', format: 'txt' }))).to.be.true - expect(await transcript.read()).to.equals(`you + expect(await transcript.read()).to.equals(`December 1965. +Is that all it has been since I inherited the world only three years? +Seems like a hundred million. `) }) diff --git a/packages/tests/src/transcription/whisper/transcriber/whisper-ctranslate2.spec.ts b/packages/tests/src/transcription/whisper/transcriber/whisper-ctranslate2.spec.ts index 4dbefeeafef2..0ea2607e653e 100644 --- a/packages/tests/src/transcription/whisper/transcriber/whisper-ctranslate2.spec.ts +++ b/packages/tests/src/transcription/whisper/transcriber/whisper-ctranslate2.spec.ts @@ -16,7 +16,7 @@ config.truncateThreshold = 0 describe('Whisper CTranslate2 transcriber', function () { const transcriptDirectory = join(root(), 'test-transcript') - const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4') + const shortVideoPath = buildAbsoluteFixturePath('transcription/videos/the_last_man_on_earth.mp4') const frVideoPath = buildAbsoluteFixturePath('transcription/videos/derive_sectaire.mp4') const transcriber = new Ctranslate2Transcriber( { @@ -37,12 +37,12 @@ describe('Whisper CTranslate2 transcriber', function () { it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () { const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en' }) - expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'video_short.vtt'), language: 'en' }))).to.be.true + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'the_last_man_on_earth.vtt'), language: 'en' }))).to.be.true expect(await readFile(transcript.path, 'utf8')).to.equal( `WEBVTT -00:00.000 --> 00:02.000 -You +00:00.000 --> 00:12.000 +December 1965, is that all it has been since I inherited the world only three years, seems like a hundred million. ` ) @@ -51,15 +51,15 @@ You it('May produce a transcript file in the `srt` format', async function () { const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'srt' }) expect(await transcript.equals(new TranscriptFile({ - path: join(transcriptDirectory, 'video_short.srt'), + path: join(transcriptDirectory, 'the_last_man_on_earth.srt'), format: 'srt', language: 'en' }))).to.be.true expect(await readFile(transcript.path, 'utf8')).to.equal( `1 -00:00:00,000 --> 00:00:02,000 -You +00:00:00,000 --> 00:00:12,000 +December 1965, is that all it has been since I inherited the world only three years, seems like a hundred million. ` ) @@ -68,12 +68,12 @@ You it('May produce a transcript file in the `txt` format', async function () { const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'txt' }) expect(await transcript.equals(new TranscriptFile({ - path: join(transcriptDirectory, 'video_short.txt'), + path: join(transcriptDirectory, 'the_last_man_on_earth.txt'), format: 'txt', language: 'en' }))).to.be.true - expect(await transcript.read()).to.equal(`You + expect(await transcript.read()).to.equal(`December 1965, is that all it has been since I inherited the world only three years, seems like a hundred million. `) }) @@ -86,12 +86,12 @@ You format: 'txt' }) expect(await transcript.equals(new TranscriptFile({ - path: join(transcriptDirectory, 'video_short.txt'), + path: join(transcriptDirectory, 'the_last_man_on_earth.txt'), format: 'txt', language: 'en' }))).to.be.true - expect(await transcript.read()).to.equal(`You + expect(await transcript.read()).to.equal(`December 1965, is that all it has been since I inherited the world only three years, seems like a hundred million. `) })