Skip to content

Commit

Permalink
Add support for transcription handling (#1119)
Browse files Browse the repository at this point in the history
* Add support for transcription handling

* Create honest-vans-tan.md

* emit event for each segment

* Emit segement array and also emit on publication

* export TranscriptionSegment type

* Add track sync time updates

* make import more explicit

* emit events also for local participant

* mark transcription events as beta

* update protocol
  • Loading branch information
lukasIO authored May 3, 2024
1 parent 780f131 commit a087e03
Show file tree
Hide file tree
Showing 13 changed files with 127 additions and 22 deletions.
5 changes: 5 additions & 0 deletions .changeset/honest-vans-tan.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"livekit-client": patch
---

Add support for transcription handling
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
"size-limit": "size-limit"
},
"dependencies": {
"@livekit/protocol": "1.14.0",
"@livekit/protocol": "1.15.0",
"events": "^3.3.0",
"loglevel": "^1.8.0",
"sdp-transform": "^2.14.1",
Expand Down
25 changes: 10 additions & 15 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ export { facingModeFromDeviceLabel, facingModeFromLocalTrack } from './room/trac
export * from './room/track/options';
export * from './room/track/processor/types';
export * from './room/track/types';
export type { DataPublishOptions, SimulationScenario } from './room/types';
export type { DataPublishOptions, SimulationScenario, TranscriptionSegment } from './room/types';
export * from './version';
export {
ConnectionQuality,
Expand Down
4 changes: 4 additions & 0 deletions src/room/RTCEngine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import {
TrackInfo,
type TrackPublishedResponse,
TrackUnpublishedResponse,
Transcription,
UpdateSubscription,
UserPacket,
} from '@livekit/protocol';
Expand Down Expand Up @@ -634,6 +635,8 @@ export default class RTCEngine extends (EventEmitter as new () => TypedEventEmit
this.emit(EngineEvent.ActiveSpeakersUpdate, dp.value.value.speakers);
} else if (dp.value?.case === 'user') {
this.emit(EngineEvent.DataPacketReceived, dp.value.value, dp.kind);
} else if (dp.value?.case === 'transcription') {
this.emit(EngineEvent.TranscriptionReceived, dp.value.value);
}
} finally {
unlock();
Expand Down Expand Up @@ -1357,6 +1360,7 @@ export type EngineEventCallbacks = {
) => void;
activeSpeakersUpdate: (speakers: Array<SpeakerInfo>) => void;
dataPacketReceived: (userPacket: UserPacket, kind: DataPacket_Kind) => void;
transcriptionReceived: (transcription: Transcription) => void;
transportsCreated: (publisher: PCTransport, subscriber: PCTransport) => void;
/** @internal */
trackSenderAdded: (track: Track, sender: RTCRtpSender) => void;
Expand Down
28 changes: 27 additions & 1 deletion src/room/Room.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import {
TrackInfo,
TrackSource,
TrackType,
Transcription as TranscriptionModel,
TranscriptionSegment as TranscriptionSegmentModel,
UserPacket,
protoInt64,
} from '@livekit/protocol';
Expand Down Expand Up @@ -61,11 +63,12 @@ import type { TrackPublication } from './track/TrackPublication';
import type { TrackProcessor } from './track/processor/types';
import type { AdaptiveStreamSettings } from './track/types';
import { getNewAudioContext, sourceToKind } from './track/utils';
import type { SimulationOptions, SimulationScenario } from './types';
import type { SimulationOptions, SimulationScenario, TranscriptionSegment } from './types';
import {
Future,
Mutex,
createDummyVideoStreamTrack,
extractTranscriptionSegments,
getEmptyAudioStreamTrack,
isBrowserSupported,
isCloud,
Expand Down Expand Up @@ -330,6 +333,7 @@ class Room extends (EventEmitter as new () => TypedEmitter<RoomEventCallbacks>)
})
.on(EngineEvent.ActiveSpeakersUpdate, this.handleActiveSpeakersUpdate)
.on(EngineEvent.DataPacketReceived, this.handleDataPacket)
.on(EngineEvent.TranscriptionReceived, this.handleTranscription)
.on(EngineEvent.Resuming, () => {
this.clearConnectionReconcile();
this.isResuming = true;
Expand Down Expand Up @@ -1471,6 +1475,23 @@ class Room extends (EventEmitter as new () => TypedEmitter<RoomEventCallbacks>)
participant?.emit(ParticipantEvent.DataReceived, userPacket.payload, kind);
};

bufferedSegments: Map<string, TranscriptionSegmentModel> = new Map();

private handleTranscription = (transcription: TranscriptionModel) => {
// find the participant
const participant =
transcription.participantIdentity === this.localParticipant.identity
? this.localParticipant
: this.remoteParticipants.get(transcription.participantIdentity);
const publication = participant?.trackPublications.get(transcription.trackId);

const segments = extractTranscriptionSegments(transcription);

publication?.emit(TrackEvent.TranscriptionReceived, segments);
participant?.emit(ParticipantEvent.TranscriptionReceived, segments, publication);
this.emit(RoomEvent.TranscriptionReceived, segments, participant, publication);
};

private handleAudioPlaybackStarted = () => {
if (this.canPlaybackAudio) {
return;
Expand Down Expand Up @@ -2071,6 +2092,11 @@ export type RoomEventCallbacks = {
kind?: DataPacket_Kind,
topic?: string,
) => void;
transcriptionReceived: (
transcription: TranscriptionSegment[],
participant?: Participant,
publication?: TrackPublication,
) => void;
connectionQualityChanged: (quality: ConnectionQuality, participant: Participant) => void;
mediaDevicesError: (error: Error) => void;
trackStreamStateChanged: (
Expand Down
23 changes: 23 additions & 0 deletions src/room/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,12 @@ export enum RoomEvent {
*/
DataReceived = 'dataReceived',

/**
* Transcription received from a participant's track.
* @beta
*/
TranscriptionReceived = 'transcriptionReceived',

/**
* Connection quality was changed for a Participant. It'll receive updates
* from the local participant, as well as any [[RemoteParticipant]]s that we are
Expand Down Expand Up @@ -402,6 +408,12 @@ export enum ParticipantEvent {
*/
DataReceived = 'dataReceived',

/**
* Transcription received from this participant as data source.
* @beta
*/
TranscriptionReceived = 'transcriptionReceived',

/**
* Has speaking status changed for the current participant
*
Expand Down Expand Up @@ -479,6 +491,7 @@ export enum EngineEvent {
MediaTrackAdded = 'mediaTrackAdded',
ActiveSpeakersUpdate = 'activeSpeakersUpdate',
DataPacketReceived = 'dataPacketReceived',
TranscriptionReceived = 'transcriptionReceived',
RTPVideoMapUpdate = 'rtpVideoMapUpdate',
DCBufferStatusChanged = 'dcBufferStatusChanged',
ParticipantUpdate = 'participantUpdate',
Expand Down Expand Up @@ -562,4 +575,14 @@ export enum TrackEvent {
* @internal
*/
AudioTrackFeatureUpdate = 'audioTrackFeatureUpdate',

/**
* @beta
*/
TranscriptionReceived = 'transcriptionReceived',

/**
* @experimental
*/
TimeSyncUpdate = 'timeSyncUpdate',
}
6 changes: 5 additions & 1 deletion src/room/participant/Participant.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import type RemoteTrack from '../track/RemoteTrack';
import type RemoteTrackPublication from '../track/RemoteTrackPublication';
import { Track } from '../track/Track';
import type { TrackPublication } from '../track/TrackPublication';
import type { LoggerOptions } from '../types';
import type { LoggerOptions, TranscriptionSegment } from '../types';

export enum ConnectionQuality {
Excellent = 'excellent',
Expand Down Expand Up @@ -329,6 +329,10 @@ export type ParticipantEventCallbacks = {
participantMetadataChanged: (prevMetadata: string | undefined, participant?: any) => void;
participantNameChanged: (name: string) => void;
dataReceived: (payload: Uint8Array, kind: DataPacket_Kind) => void;
transcriptionReceived: (
transcription: TranscriptionSegment[],
publication?: TrackPublication,
) => void;
isSpeakingChanged: (speaking: boolean) => void;
connectionQualityChanged: (connectionQuality: ConnectionQuality) => void;
trackStreamStateChanged: (
Expand Down
13 changes: 13 additions & 0 deletions src/room/track/RemoteTrack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,20 @@ export default abstract class RemoteTrack<
if (!this.monitorInterval) {
this.monitorInterval = setInterval(() => this.monitorReceiver(), monitorFrequency);
}
this.registerTimeSyncUpdate();
}

protected abstract monitorReceiver(): void;

registerTimeSyncUpdate() {
const loop = () => {
this.timeSyncHandle = requestAnimationFrame(() => loop());
const newTime = this.receiver?.getSynchronizationSources()[0]?.rtpTimestamp;
if (newTime && this.rtpTimestamp !== newTime) {
this.emit(TrackEvent.TimeSyncUpdate, newTime);
this.rtpTimestamp = newTime;
}
};
loop();
}
}
9 changes: 9 additions & 0 deletions src/room/track/Track.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ export abstract class Track<
*/
streamState: Track.StreamState = Track.StreamState.Active;

/** @internal */
rtpTimestamp: number | undefined;

protected _mediaStreamTrack: MediaStreamTrack;

protected _mediaStreamID: string;
Expand All @@ -63,6 +66,8 @@ export abstract class Track<

private loggerContextCb: LoggerOptions['loggerContextCb'];

protected timeSyncHandle: number | undefined;

protected _currentBitrate: number = 0;

protected monitorInterval?: ReturnType<typeof setInterval>;
Expand Down Expand Up @@ -255,6 +260,9 @@ export abstract class Track<
if (this.monitorInterval) {
clearInterval(this.monitorInterval);
}
if (this.timeSyncHandle) {
cancelAnimationFrame(this.timeSyncHandle);
}
}

/** @internal */
Expand Down Expand Up @@ -517,4 +525,5 @@ export type TrackEventCallbacks = {
upstreamResumed: (track: any) => void;
trackProcessorUpdate: (processor?: TrackProcessor<Track.Kind, any>) => void;
audioTrackFeatureUpdate: (track: any, feature: AudioTrackFeature, enabled: boolean) => void;
timeSyncUpdate: (timestamp: number) => void;
};
4 changes: 3 additions & 1 deletion src/room/track/TrackPublication.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import { EventEmitter } from 'events';
import type TypedEventEmitter from 'typed-emitter';
import log, { LoggerNames, getLogger } from '../../logger';
import { TrackEvent } from '../events';
import type { LoggerOptions } from '../types';
import type { LoggerOptions, TranscriptionSegment } from '../types';
import LocalAudioTrack from './LocalAudioTrack';
import LocalVideoTrack from './LocalVideoTrack';
import RemoteAudioTrack from './RemoteAudioTrack';
Expand Down Expand Up @@ -174,4 +174,6 @@ export type PublicationEventCallbacks = {
prevStatus: TrackPublication.SubscriptionStatus,
) => void;
subscriptionFailed: (error: SubscriptionError) => void;
transcriptionReceived: (transcription: TranscriptionSegment[]) => void;
timeSyncUpdate: (timestamp: number) => void;
};
9 changes: 9 additions & 0 deletions src/room/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,12 @@ export type LoggerOptions = {
loggerName?: string;
loggerContextCb?: () => Record<string, unknown>;
};

export interface TranscriptionSegment {
id: string;
text: string;
language: string;
startTime: number;
endTime: number;
final: boolean;
}
19 changes: 17 additions & 2 deletions src/room/utils.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import { ClientInfo, ClientInfo_SDK } from '@livekit/protocol';
import { ClientInfo, ClientInfo_SDK, Transcription as TranscriptionModel } from '@livekit/protocol';
import { getBrowser } from '../utils/browserParser';
import { protocolVersion, version } from '../version';
import CriticalTimers from './timers';
import type LocalAudioTrack from './track/LocalAudioTrack';
import type RemoteAudioTrack from './track/RemoteAudioTrack';
import { VideoCodec, videoCodecs } from './track/options';
import { getNewAudioContext } from './track/utils';
import type { LiveKitReactNativeInfo } from './types';
import type { LiveKitReactNativeInfo, TranscriptionSegment } from './types';

const separator = '|';
export const ddExtensionURI =
Expand Down Expand Up @@ -527,3 +527,18 @@ export function toHttpUrl(url: string): string {
}
return url;
}

export function extractTranscriptionSegments(
transcription: TranscriptionModel,
): TranscriptionSegment[] {
return transcription.segments.map(({ id, text, language, startTime, endTime, final }) => {
return {
id,
text,
startTime: Number.parseInt(startTime.toString()),
endTime: Number.parseInt(endTime.toString()),
final,
language,
};
});
}

0 comments on commit a087e03

Please sign in to comment.