From e1d758b5235783c223577ce6dbebef90bf8b66f8 Mon Sep 17 00:00:00 2001 From: Grahame Bowland Date: Tue, 22 Oct 2024 14:40:56 +0800 Subject: [PATCH] add support for logging timestamps while transcribing functionality activated with new -T/--timestamp option --- src/Hear.h | 1 + src/Hear.m | 25 ++++++++++++++++++++++--- src/main.m | 13 ++++++++++++- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/Hear.h b/src/Hear.h index bb278a2..2ee0122 100644 --- a/src/Hear.h +++ b/src/Hear.h @@ -43,6 +43,7 @@ onDevice:(BOOL)useOnDeviceRecognition singleLineMode:(BOOL)singleLine addPunctuation:(BOOL)addPunctuation + addTimestamps:(BOOL)addTimestamps exitWord:(NSString *)exitWord timeout:(CGFloat)timeout; diff --git a/src/Hear.m b/src/Hear.m index 421e8b2..856ffc0 100644 --- a/src/Hear.m +++ b/src/Hear.m @@ -49,6 +49,7 @@ @interface Hear() @property (nonatomic) BOOL useOnDeviceRecognition; @property (nonatomic) BOOL singleLineMode; @property (nonatomic) BOOL addPunctuation; +@property (nonatomic) BOOL addTimestamps; @property (nonatomic, retain) NSString *exitWord; @property (nonatomic) CGFloat timeout; @@ -61,6 +62,7 @@ - (instancetype)initWithLocale:(NSString *)loc onDevice:(BOOL)onDevice singleLineMode:(BOOL)singleLine addPunctuation:(BOOL)punctuation + addTimestamps:(BOOL)timestamps exitWord:(NSString *)exitWord timeout:(CGFloat)timeout { self = [super init]; @@ -76,6 +78,7 @@ - (instancetype)initWithLocale:(NSString *)loc self.singleLineMode = singleLine; self.useDeviceInput = (input == nil); self.addPunctuation = punctuation; + self.addTimestamps = timestamps; self.exitWord = exitWord; self.timeout = timeout; } @@ -193,13 +196,21 @@ - (void)processFile { if (result == nil) { return; } - + + if (@available(macOS 13, *)) { + if (self.addTimestamps) { + SFSpeechRecognitionMetadata* meta = result.speechRecognitionMetadata; + NSString *timestamp = [[NSDateComponentsFormatter new] stringFromTimeInterval:meta.speechStartTimestamp]; + NSDump([NSString stringWithFormat:@"\n%@ -> \n", timestamp]); + } + } + // Make sure there's a space between the incoming result strings NSString *s = result.bestTranscription.formattedString; if ([s hasSuffix:@" "] == FALSE && !result.isFinal) { s = [NSString stringWithFormat:@"%@ ", s]; } - + // Print to stdout without newline and flush NSDump(s); @@ -246,7 +257,15 @@ - (void)startListening { if (self.timeout > 0) { [self startTimer:self]; } - + + if (@available(macOS 13, *)) { + if (self.addTimestamps) { + SFSpeechRecognitionMetadata* meta = result.speechRecognitionMetadata; + NSString *timestamp = [[NSDateComponentsFormatter new] stringFromTimeInterval:meta.speechStartTimestamp]; + NSDump([NSString stringWithFormat:@"\n%@ -> \n", timestamp]); + } + } + // Print to stdout NSString *transcript = result.bestTranscription.formattedString; if (self.singleLineMode) { diff --git a/src/main.m b/src/main.m index 41f503c..d8f9798 100644 --- a/src/main.m +++ b/src/main.m @@ -43,7 +43,7 @@ static inline void PrintHelp(void); // Command line options -static const char optstring[] = "sl:i:dpmx:t:hv"; +static const char optstring[] = "sl:i:dpmx:t:Thv"; static struct option long_options[] = { // List supported locales for speech to text @@ -56,6 +56,8 @@ {"device", no_argument, 0, 'd'}, // Whether to add punctuation to speech recognition results {"punctuation", no_argument, 0, 'p'}, + // Whether to add timestamps when reading from a file + {"timestamp", no_argument, 0, 'T'}, // Enable single-line output mode (for mic) {"mode", no_argument, 0, 'm'}, // Exit word @@ -84,6 +86,7 @@ int main(int argc, const char * argv[]) { @autoreleasepool { BOOL useOnDeviceRecognition = NO; BOOL singleLineMode = NO; BOOL addsPunctuation = NO; + BOOL addsTimestamps = NO; CGFloat timeout = 0.0f; // Parse arguments @@ -124,6 +127,12 @@ int main(int argc, const char * argv[]) { @autoreleasepool { case 'p': addsPunctuation = YES; break; + + // Whether to add timestamps to speech recognition results + // This option is ignored on macOS versions prior to Ventura + case 'T': + addsTimestamps = YES; + break; // Set exit word (causes app to exit when word detected in speech) case 'x': @@ -155,6 +164,7 @@ int main(int argc, const char * argv[]) { @autoreleasepool { onDevice:useOnDeviceRecognition singleLineMode:singleLineMode addPunctuation:addsPunctuation + addTimestamps:addsTimestamps exitWord:exitWord timeout:timeout]; [[NSApplication sharedApplication] setDelegate:hear]; @@ -194,6 +204,7 @@ static inline void PrintHelp(void) { -p --punctuation Add punctuation to speech recognition results (macOS 13+)\n\ -x --exit-word Set exit word that causes program to quit\n\ -t --timeout Set silence timeout (in seconds)\n\ + -T --timestamps Write timestamps as transcription occurs\n\ \n\ -h --help Prints help\n\ -v --version Prints program name and version\n\