Skip to content

Commit

Permalink
chore(test): implement formats test of all implementations
Browse files Browse the repository at this point in the history
Also compare result of other implementation to the reference implementation
  • Loading branch information
lutangar committed Apr 23, 2024
1 parent c6f230b commit 1d27252
Show file tree
Hide file tree
Showing 8 changed files with 223 additions and 112 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,35 +10,29 @@ config.truncateThreshold = 0

describe('Open AI Whisper transcriber', function () {
const transcriptDirectory = join(root(), 'test-transcript')
const expectedVttTranscriptPath = join(transcriptDirectory, 'video_short.vtt')
const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4')

const transcriber = new OpenaiTranscriber(
{
name: 'openai-whisper',
requirements: [],
type: 'binary',
binary: 'whisper',
supportedModelFormats: [ 'PyTorch' ]
},
createLogger(),
transcriptDirectory
)

before(async function () {
await mkdir(transcriptDirectory, { recursive: true })
})

it('Should transcribe a media file', async function () {
const transcriber = new OpenaiTranscriber(
{
name: 'openai-whisper',
requirements: [],
language: '',
type: 'binary',
license: '',
supportedModelFormats: [ 'PyTorch' ]
},
createLogger(),
transcriptDirectory
)
const transcript = await transcriber.transcribe(
buildAbsoluteFixturePath('video_short.mp4'),
{ name: 'tiny' },
'fr',
'vtt'
)

it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () {
const transcript = await transcriber.transcribe(shortVideoPath)
expect(transcript).to.deep.equals({
path: expectedVttTranscriptPath,
language: 'fr',
path: join(transcriptDirectory, 'video_short.vtt'),
language: 'en',
format: 'vtt'
})

Expand All @@ -54,6 +48,39 @@ You
)
})

it('May produce a transcript file in the `srt` format', async function () {
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'srt')
expect(transcript).to.deep.equals({
path: join(transcriptDirectory, 'video_short.srt'),
language: 'en',
format: 'srt'
})

// eslint-disable-next-line @typescript-eslint/no-unused-expressions
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
expect(await readFile(transcript.path, 'utf8')).to.equal(
`1
00:00:00,000 --> 00:00:02,000
You
`
)
})

it('May produce a transcript file in the `txt` format', async function () {
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'txt')
expect(transcript).to.deep.equals({
path: join(transcriptDirectory, 'video_short.txt'),
language: 'en',
format: 'txt'
})

// eslint-disable-next-line @typescript-eslint/no-unused-expressions
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
expect(await readFile(transcript.path, 'utf8')).to.equal(`You
`)
})

after(async function () {
await rm(transcriptDirectory, { recursive: true, force: true })
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,56 +4,116 @@ import { expect, config } from 'chai'
import { existsSync } from 'node:fs'
import { mkdir, readFile, rm } from 'node:fs/promises'
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
import { OpenaiTranscriber } from '@peertube/peertube-transcription'
import { OpenaiTranscriber, WhisperTimestampedTranscriber } from '@peertube/peertube-transcription'

config.truncateThreshold = 0

describe('Linto timestamped Whisper transcriber', function () {
const transcriptDirectory = join(root(), 'test-transcript')
const expectedVttTranscriptPath = join(transcriptDirectory, 'video_short.vtt')
const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4')
const transcriber = new WhisperTimestampedTranscriber(
{
name: 'whisper-timestamped',
requirements: [],
type: 'binary',
binary: 'whisper_timestamped',
supportedModelFormats: [ 'PyTorch' ]
},
createLogger(),
transcriptDirectory
)

before(async function () {
await mkdir(transcriptDirectory, { recursive: true })
})

it('Should transcribe a media file', async function () {
const transcriber = new OpenaiTranscriber(
{
name: 'timestamped-whisper',
requirements: [],
language: '',
type: 'binary',
license: '',
supportedModelFormats: [ 'PyTorch' ]
},
createLogger(),
transcriptDirectory
)
it('Should transcribe a media file and produce transcript file in th `vtt` format by default', async function () {
const transcript = await transcriber.transcribe(
buildAbsoluteFixturePath('video_short.mp4'),
shortVideoPath,
{ name: 'tiny' },
'fr',
'vtt'
)

expect(transcript).to.deep.equals({
path: expectedVttTranscriptPath,
path: join(transcriptDirectory, 'video_short.vtt'),
language: 'fr',
format: 'vtt'
})

// eslint-disable-next-line @typescript-eslint/no-unused-expressions
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true

// Whisper timestamped should produce a transcript with micro seconds precisions.
expect(await readFile(transcript.path, 'utf8')).to.equal(
`WEBVTT
00:00.000 --> 00:02.000
You
00:02.480 --> 00:02.500
you
`
)
})

it('May produce a transcript file in the `srt` format', async function () {
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'srt')
expect(transcript).to.deep.equals({
path: join(transcriptDirectory, 'video_short.srt'),
language: 'en',
format: 'srt'
})

// eslint-disable-next-line @typescript-eslint/no-unused-expressions
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
expect(await readFile(transcript.path, 'utf8')).to.equal(
`1
00:00:02,480 --> 00:00:02,500
you
`
)
})

it('May produce a transcript file in `txt` format', async function () {
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'txt')
expect(transcript).to.deep.equals({
path: join(transcriptDirectory, 'video_short.txt'),
language: 'en',
format: 'txt'
})

// eslint-disable-next-line @typescript-eslint/no-unused-expressions
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
expect(await readFile(transcript.path, 'utf8')).to.equal(`You
`)
})

it('Should produce the same transcript text as openai-whisper given the same parameters', async function () {
const transcribeArguments: Parameters<typeof transcriber.transcribe> = [
shortVideoPath,
{ name: 'tiny' },
'en',
'txt'
]
const transcript = await transcriber.transcribe(...transcribeArguments)
const openaiTranscriber = new OpenaiTranscriber(
{
name: 'openai-whisper',
requirements: [],
type: 'binary',
binary: 'whisper',
supportedModelFormats: [ 'PyTorch' ]
},
createLogger(),
join(transcriptDirectory, 'openai-whisper')
)
const openaiTranscript = await openaiTranscriber.transcribe(...transcribeArguments)

// eslint-disable-next-line @typescript-eslint/no-unused-expressions
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
expect(await readFile(transcript.path, 'utf8')).to.equal(await readFile(openaiTranscript.path, 'utf8'))
})

after(async function () {
await rm(transcriptDirectory, { recursive: true, force: true })
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,41 +4,34 @@ import { expect, config } from 'chai'
import { existsSync } from 'node:fs'
import { mkdir, readFile, rm } from 'node:fs/promises'
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
import { OpenaiTranscriber } from '@peertube/peertube-transcription'
import { Ctranslate2Transcriber, OpenaiTranscriber } from '@peertube/peertube-transcription'

config.truncateThreshold = 0

describe('Whisper CTranslate2 transcriber', function () {
const transcriptDirectory = join(root(), 'test-transcript')
const expectedVttTranscriptPath = join(transcriptDirectory, 'video_short.vtt')
const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4')
const transcriber = new Ctranslate2Transcriber(
{
name: 'anyNameShouldBeFineReally',
requirements: [],
type: 'binary',
binary: 'whisper-ctranslate2',
supportedModelFormats: []
},
createLogger(),
transcriptDirectory
)

before(async function () {
await mkdir(transcriptDirectory, { recursive: true })
})

it('Should transcribe a media file', async function () {
const transcriber = new OpenaiTranscriber(
{
name: 'whisper-ctranslate2',
requirements: [],
language: '',
type: 'binary',
license: '',
supportedModelFormats: []
},
createLogger(),
transcriptDirectory
)
const transcript = await transcriber.transcribe(
buildAbsoluteFixturePath('video_short.mp4'),
{ name: 'tiny' },
'fr',
'vtt'
)

it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () {
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' })
expect(transcript).to.deep.equals({
path: expectedVttTranscriptPath,
language: 'fr',
path: join(transcriptDirectory, 'video_short.vtt'),
language: 'en',
format: 'vtt'
})

Expand All @@ -54,6 +47,65 @@ You
)
})

it('May produce a transcript file in the `srt` format', async function () {
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'srt')
expect(transcript).to.deep.equals({
path: join(transcriptDirectory, 'video_short.srt'),
language: 'en',
format: 'srt'
})

// eslint-disable-next-line @typescript-eslint/no-unused-expressions
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
expect(await readFile(transcript.path, 'utf8')).to.equal(
`1
00:00:00,000 --> 00:00:02,000
You
`
)
})

it('May produce a transcript file in the `txt` format', async function () {
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'txt')
expect(transcript).to.deep.equals({
path: join(transcriptDirectory, 'video_short.txt'),
language: 'en',
format: 'txt'
})

// eslint-disable-next-line @typescript-eslint/no-unused-expressions
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
expect(await readFile(transcript.path, 'utf8')).to.equal(`You
`)
})

it('Should produce the same transcript text as openai-whisper given the same parameters', async function () {
const transcribeArguments: Parameters<typeof transcriber.transcribe> = [
shortVideoPath,
{ name: 'tiny' },
'en',
'txt'
]
const transcript = await transcriber.transcribe(...transcribeArguments)
const openaiTranscriber = new OpenaiTranscriber(
{
name: 'openai-whisper',
requirements: [],
type: 'binary',
binary: 'whisper',
supportedModelFormats: [ 'PyTorch' ]
},
createLogger(),
join(transcriptDirectory, 'openai-whisper')
)
const openaiTranscript = await openaiTranscriber.transcribe(...transcribeArguments)

// eslint-disable-next-line @typescript-eslint/no-unused-expressions
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
expect(await readFile(transcript.path, 'utf8')).to.equal(await readFile(openaiTranscript.path, 'utf8'))
})

after(async function () {
await rm(transcriptDirectory, { recursive: true, force: true })
})
Expand Down
6 changes: 3 additions & 3 deletions packages/transcription/src/transcription-engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ import { ModelFormat } from './transcription-model.js'
export interface TranscriptionEngine {
name: string
description?: string
language: string
language?: string
requirements: string[]
type: 'binary' | 'bindings' | 'ws'
binary?: string
license: string
binary: string
license?: string
forgeURL?: string
supportedModelFormats: ModelFormat[]

Expand Down
3 changes: 2 additions & 1 deletion packages/transcription/src/whisper/engines.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export const engines: TranscriptionEngine[] = [
name : 'whisper-cpp',
description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
type: 'binary',
binary: 'main',
language : 'cpp',
requirements : [],
forgeURL : 'https://github.com/ggerganov/whisper.cpp',
Expand Down Expand Up @@ -49,7 +50,7 @@ export const engines: TranscriptionEngine[] = [
requirements: [ 'python' ],
language: 'python',
type: 'binary',
binary: 'whisper-ctranslate2',
binary: 'whisper_timestamped',
forgeURL: 'https://github.com/openai/whisper',
license: 'MIT',
supportedModelFormats: [ 'CTranslate2' ]
Expand Down
Loading

0 comments on commit 1d27252

Please sign in to comment.