Skip to content

Commit

Permalink
chore(transcription): change transcribers transcribe method signature
Browse files Browse the repository at this point in the history
Introduce whisper builtin model.
  • Loading branch information
lutangar committed May 6, 2024
1 parent 6ccbe78 commit 67a921f
Show file tree
Hide file tree
Showing 18 changed files with 187 additions and 184 deletions.
13 changes: 10 additions & 3 deletions packages/tests/src/transcription/benchmark.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import {
transcriberFactory,
TranscriptFile,
TranscriptFileEvaluator,
TranscriptionEngine
TranscriptionEngine,
TranscriptionModel
} from '@peertube/peertube-transcription'

interface TestResult {
Expand Down Expand Up @@ -101,9 +102,15 @@ describe('Transcribers benchmark', function () {
models.forEach((modelName) => {
it(`Run ${transcriberName} transcriber benchmark with ${modelName} model`, async function () {
this.timeout(15 * 1000 * 60) // 15 minutes
const model = { name: modelName }
const model = new TranscriptionModel(modelName)
const uuid = short.generate()
const transcriptFile = await transcriber.transcribe(mediaFilePath, model, 'fr', 'txt', uuid)
const transcriptFile = await transcriber.transcribe({
mediaFilePath,
model,
language: 'fr',
format: 'txt',
runId: uuid
})
const evaluator = new TranscriptFileEvaluator(referenceTranscriptFile, transcriptFile)
await new Promise(resolve => setTimeout(resolve, 1))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { expect } from 'chai'

describe('Transcript File Evaluator', function () {
const transcriptDirectory = buildAbsoluteFixturePath('transcription/transcript-evaluator')
const referenceTranscriptFilepath = buildAbsoluteFixturePath('transcription/transcript/reference.txt')
const referenceTranscriptFilePath = buildAbsoluteFixturePath('transcription/transcript/reference.txt')

before(async function () {
await mkdir(transcriptDirectory, { recursive: true })
Expand All @@ -29,7 +29,7 @@ describe('Transcript File Evaluator', function () {

it(`evaluation must return coherent wer & cer`, async function () {
const reference = new TranscriptFile({
path: referenceTranscriptFilepath,
path: referenceTranscriptFilePath,
language: 'fr',
format: 'txt'
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@ import { createLogger } from 'winston'
import { join } from 'path'
import { mkdir, rm } from 'node:fs/promises'
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
import { OpenaiTranscriber, TranscriptFile } from '@peertube/peertube-transcription'
import { OpenaiTranscriber, TranscriptFile, TranscriptionModel, WhisperBuiltinModel } from '@peertube/peertube-transcription'

config.truncateThreshold = 0

describe('Open AI Whisper transcriber', function () {
const transcriptDirectory = join(root(), 'test-transcript')
const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4')
const frVideoPath = buildAbsoluteFixturePath('transcription/videos/communiquer-lors-dune-classe-transplantee.mp4')

const transcriber = new OpenaiTranscriber(
{
name: 'openai-whisper',
Expand All @@ -30,7 +29,7 @@ describe('Open AI Whisper transcriber', function () {
})

it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () {
const transcript = await transcriber.transcribe(shortVideoPath)
const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en' })
expect(await transcript.equals(new TranscriptFile({
path: join(transcriptDirectory, 'video_short.vtt'),
language: 'en',
Expand All @@ -48,7 +47,7 @@ You
})

it('May produce a transcript file in the `srt` format', async function () {
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'srt')
const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'srt' })
expect(await transcript.equals(new TranscriptFile({
path: join(transcriptDirectory, 'video_short.srt'),
language: 'en',
Expand All @@ -65,7 +64,7 @@ You
})

it('May produce a transcript file in the `txt` format', async function () {
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'txt')
const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'txt' })
expect(await transcript.equals(new TranscriptFile({
path: join(transcriptDirectory, 'video_short.txt'),
language: 'en',
Expand All @@ -77,12 +76,13 @@ You
})

it('May transcribe a media file using a local PyTorch model', async function () {
await transcriber.transcribe(frVideoPath, { name: 'myLocalModel', path: buildAbsoluteFixturePath('transcription/models/tiny.pt') }, 'fr')
this.timeout(2 * 1000 * 60)
await transcriber.transcribe({ mediaFilePath: frVideoPath, model: TranscriptionModel.fromPath(buildAbsoluteFixturePath('transcription/models/tiny.pt')), language: 'en' })
})

it('May transcribe a media file in french', async function () {
this.timeout(45000)
const transcript = await transcriber.transcribe(frVideoPath, { name: 'tiny' }, 'fr', 'txt')
this.timeout(2 * 1000 * 60)
const transcript = await transcriber.transcribe({ mediaFilePath: frVideoPath, language: 'fr', format: 'txt' })
expect(await transcript.equals(new TranscriptFile({
path: join(transcriptDirectory, 'communiquer-lors-dune-classe-transplantee.txt'),
language: 'fr',
Expand All @@ -105,8 +105,8 @@ Ensuite, il pourront lire et commenter ce de leurs camarades ou répondre aux co
})

it('May transcribe a media file in french with small model', async function () {
this.timeout(400000)
const transcript = await transcriber.transcribe(frVideoPath, { name: 'small' }, 'fr', 'txt')
this.timeout(5 * 1000 * 60)
const transcript = await transcriber.transcribe({ mediaFilePath: frVideoPath, language: 'fr', format: 'txt', model: new WhisperBuiltinModel('small') })
expect(await transcript.equals(new TranscriptFile({
path: join(transcriptDirectory, 'communiquer-lors-dune-classe-transplantee.txt'),
language: 'fr',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,15 @@ import { createLogger } from 'winston'
import { join } from 'path'
import { mkdir, rm } from 'node:fs/promises'
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
import { OpenaiTranscriber, WhisperTimestampedTranscriber, TranscriptFile, TranscriptFileEvaluator } from '@peertube/peertube-transcription'
import {
OpenaiTranscriber,
WhisperTimestampedTranscriber,
TranscriptFile,
TranscriptFileEvaluator,
TranscriptionModel,
WhisperTranscribeArgs,
WhisperBuiltinModel
} from '@peertube/peertube-transcription'

config.truncateThreshold = 0

Expand All @@ -29,15 +37,10 @@ describe('Linto timestamped Whisper transcriber', function () {
})

it('Should transcribe a media file and produce a transcript file in `vtt` with a ms precision', async function () {
const transcript = await transcriber.transcribe(
shortVideoPath,
{ name: 'tiny' },
'fr'
)

const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en' })
expect(await transcript.equals(new TranscriptFile({
path: join(transcriptDirectory, 'video_short.vtt'),
language: 'fr',
language: 'en',
format: 'vtt'
}))).to.be.true

Expand All @@ -52,7 +55,7 @@ you
})

it('May produce a transcript file in the `srt` format with a ms precision', async function () {
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'srt')
const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'srt' })
expect(await transcript.equals(new TranscriptFile({
path: join(transcriptDirectory, 'video_short.srt'),
language: 'en',
Expand All @@ -69,7 +72,7 @@ you
})

it('May produce a transcript file in `txt` format', async function () {
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'txt')
const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'txt' })
expect(await transcript.equals(new TranscriptFile({
path: join(transcriptDirectory, 'video_short.txt'),
language: 'en',
Expand All @@ -81,12 +84,17 @@ you
})

it('May transcribe a media file using a local PyTorch model file', async function () {
await transcriber.transcribe(frVideoPath, { name: 'myLocalModel', path: buildAbsoluteFixturePath('transcription/models/tiny.pt') }, 'fr')
await transcriber.transcribe({ mediaFilePath: frVideoPath, model: TranscriptionModel.fromPath(buildAbsoluteFixturePath('transcription/models/tiny.pt')), language: 'en' })
})

it('May transcribe a media file in french', async function () {
this.timeout(45000)
const transcript = await transcriber.transcribe(frVideoPath, { name: 'tiny' }, 'fr', 'txt')
this.timeout(2 * 1000 * 60)
const transcript = await transcriber.transcribe({
mediaFilePath: frVideoPath,
language: 'fr',
format: 'txt',
model: new WhisperBuiltinModel('tiny')
})
expect(await transcript.equals(new TranscriptFile({
path: join(transcriptDirectory, 'communiquer-lors-dune-classe-transplantee.txt'),
language: 'fr',
Expand Down Expand Up @@ -118,13 +126,13 @@ Ensuite, il pourront lire et commenter ce de leur camarade, ou répondre au comm

it('Should produce a text transcript similar to openai-whisper implementation', async function () {
this.timeout(5 * 1000 * 60)
const transcribeArguments: Parameters<typeof transcriber.transcribe> = [
frVideoPath,
{ name: 'tiny' },
'fr',
'txt'
]
const transcript = await transcriber.transcribe(...transcribeArguments)
const transcribeArgs: WhisperTranscribeArgs = {
mediaFilePath: frVideoPath,
model: TranscriptionModel.fromPath(buildAbsoluteFixturePath('transcription/models/tiny.pt')),
language: 'fr',
format: 'txt'
}
const transcript = await transcriber.transcribe(transcribeArgs)

const openaiTranscriber = new OpenaiTranscriber(
{
Expand All @@ -137,7 +145,7 @@ Ensuite, il pourront lire et commenter ce de leur camarade, ou répondre au comm
createLogger(),
join(transcriptDirectory, 'openai-whisper')
)
const openaiTranscript = await openaiTranscriber.transcribe(...transcribeArguments)
const openaiTranscript = await openaiTranscriber.transcribe(transcribeArgs)

const transcriptFileEvaluator = new TranscriptFileEvaluator(openaiTranscript, transcript)
expect(await transcriptFileEvaluator.wer()).to.be.below(25 / 100)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@ import { createLogger } from 'winston'
import { join } from 'path'
import { mkdir, readFile, rm } from 'node:fs/promises'
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
import { Ctranslate2Transcriber, OpenaiTranscriber, TranscriptFile, TranscriptFileEvaluator } from '@peertube/peertube-transcription'
import {
Ctranslate2Transcriber,
OpenaiTranscriber,
TranscriptFile,
TranscriptFileEvaluator,
TranscriptionModel, WhisperTranscribeArgs
} from '@peertube/peertube-transcription'

config.truncateThreshold = 0

Expand All @@ -29,7 +35,7 @@ describe('Whisper CTranslate2 transcriber', function () {
})

it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () {
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' })
const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en' })
expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'video_short.vtt') }))).to.be.true
expect(await readFile(transcript.path, 'utf8')).to.equal(
`WEBVTT
Expand All @@ -42,7 +48,7 @@ You
})

it('May produce a transcript file in the `srt` format', async function () {
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'srt')
const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'srt' })
expect(await transcript.equals(new TranscriptFile({
path: join(transcriptDirectory, 'video_short.srt'),
format: 'srt'
Expand All @@ -58,7 +64,7 @@ You
})

it('May produce a transcript file in the `txt` format', async function () {
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'txt')
const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'txt' })
expect(await transcript.equals(new TranscriptFile({
path: join(transcriptDirectory, 'video_short.txt'),
format: 'txt'
Expand All @@ -69,12 +75,13 @@ You
})

it('May transcribe a media file using a local CTranslate2 model', async function () {
const transcript = await transcriber.transcribe(
shortVideoPath,
{ name: 'myLocalModel', path: buildAbsoluteFixturePath('transcription/models/faster-whisper-tiny') },
'en',
'txt'
)
this.timeout(2 * 1000 * 60)
const transcript = await transcriber.transcribe({
mediaFilePath: shortVideoPath,
model: TranscriptionModel.fromPath(buildAbsoluteFixturePath('transcription/models/faster-whisper-tiny')),
language: 'en',
format: 'txt'
})
expect(await transcript.equals(new TranscriptFile({
path: join(transcriptDirectory, 'video_short.txt'),
format: 'txt'
Expand All @@ -85,8 +92,8 @@ You
})

it('May transcribe a media file in french', async function () {
this.timeout(45000)
const transcript = await transcriber.transcribe(frVideoPath, { name: 'tiny' }, 'fr', 'txt')
this.timeout(5 * 1000 * 60)
const transcript = await transcriber.transcribe({ mediaFilePath: frVideoPath, language: 'fr', format: 'txt' })
expect(await transcript.equals(new TranscriptFile({
path: join(transcriptDirectory, 'communiquer-lors-dune-classe-transplantee.txt'),
language: 'fr',
Expand All @@ -109,13 +116,13 @@ Ensuite, il pourront lire et commenter ce de leur camarade, on répondra au comm

it('Should produce a text transcript similar to openai-whisper implementation', async function () {
this.timeout(5 * 1000 * 60)
const transcribeArguments: Parameters<typeof transcriber.transcribe> = [
frVideoPath,
{ name: 'tiny' },
'fr',
'txt'
]
const transcript = await transcriber.transcribe(...transcribeArguments)
const transcribeArgs: WhisperTranscribeArgs = {
mediaFilePath: frVideoPath,
model: TranscriptionModel.fromPath(buildAbsoluteFixturePath('transcription/models/tiny.pt')),
language: 'fr',
format: 'txt'
}
const transcript = await transcriber.transcribe(transcribeArgs)
const openaiTranscriber = new OpenaiTranscriber(
{
name: 'openai-whisper',
Expand All @@ -127,7 +134,7 @@ Ensuite, il pourront lire et commenter ce de leur camarade, on répondra au comm
createLogger(),
join(transcriptDirectory, 'openai-whisper')
)
const openaiTranscript = await openaiTranscriber.transcribe(...transcribeArguments)
const openaiTranscript = await openaiTranscriber.transcribe(transcribeArgs)

const transcriptFileEvaluator = new TranscriptFileEvaluator(openaiTranscript, transcript)
expect(await transcriptFileEvaluator.wer()).to.be.below(20 / 100)
Expand Down
22 changes: 12 additions & 10 deletions packages/transcription/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,24 +33,26 @@ import { OpenaiTranscriber } from '@peertube/peertube-transcription'
binary: 'whisper'
});

const transcriptFile = await transcriber.transcribe(
'./myVideo.mp4',
{ name: 'tiny' },
'en', 'txt'
);
const transcriptFile = await transcriber.transcribe({
mediaFilePath: './myVideo.mp4',
format: 'txt'
});

console.log(transcriptFile.path);
console.log(await transcriptFile.read());
})();
```

Using a local model file:

```typescript
const transcriptFile = await transcriber.transcribe(
'./myVideo.mp4',
{ name: 'my fine tuned large model', path: './models/large.pt' },
'en', 'txt'
);
import { WhisperBuiltinModel } from '@peertube/peertube-transcription/dist'

const transcriptFile = await transcriber.transcribe({
mediaFilePath: './myVideo.mp4',
model: WhisperBuiltinModel.fromPath('./models/large.pt'),
format: 'txt'
});
```

You may use the builtin Factory if you're happy with the default configuration:
Expand Down
22 changes: 15 additions & 7 deletions packages/transcription/src/abstract-transcriber.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ import { TranscriptionModel } from './transcription-model.js'
import { TranscriptionRun } from './transcription-run.js'
import { TranscriptFile, TranscriptFormat } from './transcript/index.js'

export interface TranscribeArgs {
mediaFilePath: string
model: TranscriptionModel
language?: string
format?: TranscriptFormat
runId?: SUUID
}

export abstract class AbstractTranscriber {
public static DEFAULT_TRANSCRIPT_DIRECTORY = join(root(), 'dist', 'transcripts')

Expand Down Expand Up @@ -55,11 +63,11 @@ export abstract class AbstractTranscriber {
return model.format === 'PyTorch'
}

abstract transcribe (
mediaFilePath: string,
model: TranscriptionModel,
language: string,
format: TranscriptFormat,
runId: SUUID
): Promise<TranscriptFile>
abstract transcribe ({
mediaFilePath,
model,
language,
format = 'vtt',
runId = short.generate()
}: TranscribeArgs): Promise<TranscriptFile>
}
Loading

0 comments on commit 67a921f

Please sign in to comment.