Skip to content

Commit

Permalink
add support for logging timestamps while transcribing
Browse files Browse the repository at this point in the history
functionality activated with new -T/--timestamp option
  • Loading branch information
grahame committed Oct 22, 2024
1 parent 15adb87 commit 59f8672
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/Hear.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
onDevice:(BOOL)useOnDeviceRecognition
singleLineMode:(BOOL)singleLine
addPunctuation:(BOOL)addPunctuation
addTimestamps:(BOOL)addTimestamps
exitWord:(NSString *)exitWord
timeout:(CGFloat)timeout;

Expand Down
25 changes: 22 additions & 3 deletions src/Hear.m
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ @interface Hear()
@property (nonatomic) BOOL useOnDeviceRecognition;
@property (nonatomic) BOOL singleLineMode;
@property (nonatomic) BOOL addPunctuation;
@property (nonatomic) BOOL addTimestamps;
@property (nonatomic, retain) NSString *exitWord;
@property (nonatomic) CGFloat timeout;

Expand All @@ -61,6 +62,7 @@ - (instancetype)initWithLocale:(NSString *)loc
onDevice:(BOOL)onDevice
singleLineMode:(BOOL)singleLine
addPunctuation:(BOOL)punctuation
addTimestamps:(BOOL)timestamps
exitWord:(NSString *)exitWord
timeout:(CGFloat)timeout {
self = [super init];
Expand All @@ -76,6 +78,7 @@ - (instancetype)initWithLocale:(NSString *)loc
self.singleLineMode = singleLine;
self.useDeviceInput = (input == nil);
self.addPunctuation = punctuation;
self.addTimestamps = timestamps;
self.exitWord = exitWord;
self.timeout = timeout;
}
Expand Down Expand Up @@ -193,13 +196,21 @@ - (void)processFile {
if (result == nil) {
return;
}


if (@available(macOS 13, *)) {
if (self.addTimestamps) {
SFSpeechRecognitionMetadata* meta = result.speechRecognitionMetadata;
NSString *timestamp = [NSString stringWithFormat:@"@ Timestamp: %f\n", meta.speechStartTimestamp];
NSDump(timestamp);
}
}

// Make sure there's a space between the incoming result strings
NSString *s = result.bestTranscription.formattedString;
if ([s hasSuffix:@" "] == FALSE && !result.isFinal) {
s = [NSString stringWithFormat:@"%@ ", s];
}

// Print to stdout without newline and flush
NSDump(s);

Expand Down Expand Up @@ -246,7 +257,15 @@ - (void)startListening {
if (self.timeout > 0) {
[self startTimer:self];
}


if (@available(macOS 13, *)) {
if (self.addTimestamps) {
SFSpeechRecognitionMetadata* meta = result.speechRecognitionMetadata;
NSString *timestamp = [NSString stringWithFormat:@"@ Timestamp: %f\n", meta.speechStartTimestamp];
NSDump(timestamp);
}
}

// Print to stdout
NSString *transcript = result.bestTranscription.formattedString;
if (self.singleLineMode) {
Expand Down
13 changes: 12 additions & 1 deletion src/main.m
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
static inline void PrintHelp(void);

// Command line options
static const char optstring[] = "sl:i:dpmx:t:hv";
static const char optstring[] = "sl:i:dpmx:t:Thv";

static struct option long_options[] = {
// List supported locales for speech to text
Expand All @@ -56,6 +56,8 @@
{"device", no_argument, 0, 'd'},
// Whether to add punctuation to speech recognition results
{"punctuation", no_argument, 0, 'p'},
// Whether to add timestamps when reading from a file
{"timestamp", no_argument, 0, 'T'},
// Enable single-line output mode (for mic)
{"mode", no_argument, 0, 'm'},
// Exit word
Expand Down Expand Up @@ -84,6 +86,7 @@ int main(int argc, const char * argv[]) { @autoreleasepool {
BOOL useOnDeviceRecognition = NO;
BOOL singleLineMode = NO;
BOOL addsPunctuation = NO;
BOOL addsTimestamps = NO;
CGFloat timeout = 0.0f;

// Parse arguments
Expand Down Expand Up @@ -124,6 +127,12 @@ int main(int argc, const char * argv[]) { @autoreleasepool {
case 'p':
addsPunctuation = YES;
break;

// Whether to add timestamps to speech recognition results
// This option is ignored on macOS versions prior to Ventura
case 'T':
addsTimestamps = YES;
break;

// Set exit word (causes app to exit when word detected in speech)
case 'x':
Expand Down Expand Up @@ -155,6 +164,7 @@ int main(int argc, const char * argv[]) { @autoreleasepool {
onDevice:useOnDeviceRecognition
singleLineMode:singleLineMode
addPunctuation:addsPunctuation
addTimestamps:addsTimestamps
exitWord:exitWord
timeout:timeout];
[[NSApplication sharedApplication] setDelegate:hear];
Expand Down Expand Up @@ -194,6 +204,7 @@ static inline void PrintHelp(void) {
-p --punctuation Add punctuation to speech recognition results (macOS 13+)\n\
-x --exit-word Set exit word that causes program to quit\n\
-t --timeout Set silence timeout (in seconds)\n\
-T --timestamps Write timestamps as transcription occurs\n\
\n\
-h --help Prints help\n\
-v --version Prints program name and version\n\
Expand Down

0 comments on commit 59f8672

Please sign in to comment.