Skip to content

Commit

Permalink
chore: add ctranslate2 and timestamped
Browse files Browse the repository at this point in the history
  • Loading branch information
lutangar committed Apr 19, 2024
1 parent 9803e7b commit 6d50185
Show file tree
Hide file tree
Showing 13 changed files with 207 additions and 120 deletions.
59 changes: 59 additions & 0 deletions packages/tests/src/transcription/transcribers.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import { createLogger } from 'winston'
import { join } from 'path'
import { expect } from 'chai'
import { existsSync } from 'node:fs'
import { rm, mkdir, readFile } from 'node:fs/promises'
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
import { transcriberFactory } from '@peertube/peertube-transcription'

describe('Transcribers', function () {
const transcriptDirectory = join(root(), 'test-transcript')
const vttTranscriptPath = join(transcriptDirectory, 'video_short.vtt')
const transcribers = [
'openai-whisper',
'whisper-ctranslate2',
'whisper-timestamped'
]

before(async function () {
await mkdir(transcriptDirectory, { recursive: true })
})

transcribers.forEach(function (transcriberName) {
describe(`${transcriberName}`, function () {
it(`Should instanciate`, function () {
transcriberFactory.createFromEngineName(transcriberName)
})

it('Should run transcription on a media file without raising any errors', async function () {
const transcriber = transcriberFactory.createFromEngineName(
transcriberName,
createLogger(),
transcriptDirectory
)
const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
const transcript = await transcriber.transcribe(
mediaFilePath,
{ name: 'tiny' },
'fr',
'vtt'
)
expect(transcript).to.deep.equals({
path: vttTranscriptPath,
language: 'fr',
format: 'vtt'
})
expect(transcript.path).to.equals(vttTranscriptPath)

expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true

Check failure on line 48 in packages/tests/src/transcription/transcribers.spec.ts

View workflow job for this annotation

GitHub Actions / test (lint)

Expected an assignment or function call and instead saw an expression

console.log(await readFile(transcript.path, 'utf8'))
await rm(transcript.path)
})
})
})

after(async function () {
await rm(transcriptDirectory, { recursive: true, force: true })
})
})

This file was deleted.

This file was deleted.

34 changes: 0 additions & 34 deletions packages/tests/src/transcription/whisper/transcribers.spec.ts

This file was deleted.

13 changes: 13 additions & 0 deletions packages/transcription/src/file-utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { basename, extname } from 'path'

export const getFileInfo = (path: string) => {
const extension = extname(path)
const baseName = basename(path, extension)
const name = `${baseName}${extension}`

return ({
extension,
baseName,
name
})
}
25 changes: 19 additions & 6 deletions packages/transcription/src/transcriber-factory.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import { Logger, createLogger } from 'winston'
import { TranscriptionEngine } from './transcription-engine.js'
import { TransformersTranscriber, OpenaiTranscriber } from './whisper/index.js'
import {
Ctranslate2Transcriber,
OpenaiTranscriber, WhisperTimestampedTranscriber
} from './whisper/index.js'
import { AbstractTranscriber } from './abstract-transcriber.js'

export class TranscriberFactory {
Expand All @@ -10,19 +13,29 @@ export class TranscriberFactory {
this.engines = engines
}

createFromEngineName (engineName: string, logger: Logger = createLogger(), transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY) {
createFromEngineName (
engineName: string,
logger: Logger = createLogger(),
transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY
) {
const engine = this.engines.find(({ name }) => name === engineName)
if (!engine) {
throw new Error(`Unknow engine ${engineName}`)
}

const transcriberArgs: ConstructorParameters<typeof AbstractTranscriber> = [ engine, logger, transcriptDirectory ]
const transcriberArgs: ConstructorParameters<typeof AbstractTranscriber> = [
engine,
logger,
transcriptDirectory
]

switch (engineName) {
case 'whisper':
case 'openai-whisper':
return new OpenaiTranscriber(...transcriberArgs)
case 'transformers':
return new TransformersTranscriber(...transcriberArgs)
case 'whisper-ctranslate2':
return new Ctranslate2Transcriber(...transcriberArgs)
case 'whisper-timestamped':
return new WhisperTimestampedTranscriber(...transcriberArgs)
default:
throw new Error(`Unimplemented engine ${engineName}`)
}
Expand Down
31 changes: 21 additions & 10 deletions packages/transcription/src/whisper/engines.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@ export const engines: TranscriptionEngine[] = [
license : 'MIT',
supportedModelFormats: [ 'ONNX' ]
},
{
name : 'transformers',
description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
type: 'binary',
language : 'python',
requirements : [],
forgeURL : '',
license : '',
supportedModelFormats: [ 'ONNX' ]
},
// {
// name : 'transformers',
// description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
// type: 'binary',
// language : 'python',
// requirements : [],
// forgeURL : '',
// license : '',
// supportedModelFormats: [ 'ONNX' ]
// },
{
name: 'openai-whisper',
description: 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
Expand All @@ -42,5 +42,16 @@ export const engines: TranscriptionEngine[] = [
forgeURL: 'https://github.com/openai/whisper',
license: 'MIT',
supportedModelFormats: [ 'CTranslate2' ]
},
{
name: 'whisper-timestamped',
description: '',
requirements: [ 'python' ],
language: 'python',
type: 'binary',
binary: 'whisper-ctranslate2',
forgeURL: 'https://github.com/openai/whisper',
license: 'MIT',
supportedModelFormats: [ 'CTranslate2' ]
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,19 @@ import { $ } from 'execa'
import { TranscriptionModel } from '../../transcription-model.js'
import { Transcript, TranscriptFormat } from '../../transcript.js'
import { AbstractTranscriber } from '../../abstract-transcriber.js'
import { getFileInfo } from '../../file-utils.js'

export class FasterWhisperTranscriber extends AbstractTranscriber {
export class Ctranslate2Transcriber extends AbstractTranscriber {
async transcribe (
mediaFilePath: string,
model: TranscriptionModel,
language: string,
format: TranscriptFormat = 'vtt'
): Promise<Transcript> {
const $$ = $({ verbose: true })
const { baseName } = getFileInfo(mediaFilePath)

await $$`whisper ${[
await $$`whisper-ctranslate2 ${[
mediaFilePath,
'--model',
model.name,
Expand All @@ -27,7 +29,7 @@ export class FasterWhisperTranscriber extends AbstractTranscriber {

return {
language,
path: join(this.transcriptDirectory, `test.${format}`),
path: join(this.transcriptDirectory, `${baseName}.${format}`),
format
}
}
Expand Down
2 changes: 2 additions & 0 deletions packages/transcription/src/whisper/transcriber/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
export * from './ctranslate2-transcriber.js'
export * from './transformers-js-transcriber.js'
export * from './transformers-transcriber.js'
export * from './openai-transcriber.js'
export * from './timestamped-transcriber.js'
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { $ } from 'execa'
import { TranscriptionModel } from '../../transcription-model.js'
import { Transcript, TranscriptFormat } from '../../transcript.js'
import { AbstractTranscriber } from '../../abstract-transcriber.js'
import { getFileInfo } from '../../file-utils.js'

export class OpenaiTranscriber extends AbstractTranscriber {
async transcribe (
Expand All @@ -11,9 +12,12 @@ export class OpenaiTranscriber extends AbstractTranscriber {
language: string,
format: TranscriptFormat = 'vtt'
): Promise<Transcript> {
// Shall we run the command with `{ shell: true }` to get the same error as in sh ?
// ex: ENOENT => Command not found
const $$ = $({ verbose: true })
const { baseName } = getFileInfo(mediaFilePath)

await $$`whisper ${[
const { stdout } = await $$`whisper ${[
mediaFilePath,
'--model',
model.name,
Expand All @@ -22,12 +26,14 @@ export class OpenaiTranscriber extends AbstractTranscriber {
'--output_dir',
this.transcriptDirectory
]}`
console.log(stdout)

await $$`ls ${this.transcriptDirectory}`
const { stdout: lsStdout } = await $$`ls ${this.transcriptDirectory}`
console.log(lsStdout)

return {
language,
path: join(this.transcriptDirectory, `test.${format}`),
path: join(this.transcriptDirectory, `${baseName}.${format}`),
format
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import assert from 'node:assert'
import { join } from 'node:path'
import { existsSync } from 'node:fs'
import { rename } from 'node:fs/promises'
import { $ } from 'execa'
import { TranscriptionModel } from '../../transcription-model.js'
import { Transcript, TranscriptFormat } from '../../transcript.js'
import { AbstractTranscriber } from '../../abstract-transcriber.js'
import { getFileInfo } from '../../file-utils.js'

export class WhisperTimestampedTranscriber extends AbstractTranscriber {
async transcribe (
mediaFilePath: string,
model: TranscriptionModel,
language: string,
format: TranscriptFormat = 'vtt'
): Promise<Transcript> {
const $$ = $({ verbose: true })
const { baseName, name } = getFileInfo(mediaFilePath)
await $$`whisper_timestamped ${[
mediaFilePath,
'--model',
model.name,
'--output_format',
'all',
'--output_dir',
this.transcriptDirectory
]}`

const internalTranscriptPath = join(this.transcriptDirectory, `${name}.${format}`)
const transcriptPath = join(this.transcriptDirectory, `${baseName}.${format}`)
assert(existsSync(internalTranscriptPath), '')

await rename(internalTranscriptPath, transcriptPath)
await $$`ls ${this.transcriptDirectory}`

return {
language,
path: transcriptPath,
format
}
}
}
Loading

0 comments on commit 6d50185

Please sign in to comment.