diff --git a/CHANGELOG.md b/CHANGELOG.md index 93be58c..4e4e332 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.14.6] - 2024-04-26 + +## Added + +- Support for removing words tagged as disfluency. + ## [1.14.5] - 2024-03-20 ## Added diff --git a/VERSION b/VERSION index 24a57f2..c6ba3bc 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.14.5 +1.14.6 diff --git a/speechmatics/cli.py b/speechmatics/cli.py index 08c092d..7bab565 100755 --- a/speechmatics/cli.py +++ b/speechmatics/cli.py @@ -235,6 +235,12 @@ def get_transcription_config( "volume_threshold": args.get("volume_threshold") } + if args.get("remove_disfluencies") is not None: + config["transcript_filtering_config"] = {} + config["transcript_filtering_config"]["remove_disfluencies"] = args.get( + "remove_disfluencies" + ) + if args.get("ctrl"): LOGGER.warning(f"Using internal dev control command: {args['ctrl']}") config["ctrl"] = json.loads(args["ctrl"]) diff --git a/speechmatics/cli_parser.py b/speechmatics/cli_parser.py index 3087b88..6d9d928 100644 --- a/speechmatics/cli_parser.py +++ b/speechmatics/cli_parser.py @@ -486,6 +486,12 @@ def get_arg_parser(): required=False, help="Comma-separated list of whitelisted event types for audio events.", ) + rt_transcribe_command_parser.add_argument( + "--remove-disfluencies", + default=False, + action="store_true", + help="Removes words tagged as disfluency.", + ) # Parent parser for batch auto-chapters argument batch_audio_events_parser = argparse.ArgumentParser(add_help=False) diff --git a/speechmatics/models.py b/speechmatics/models.py index 528dc17..fae5751 100644 --- a/speechmatics/models.py +++ b/speechmatics/models.py @@ -128,9 +128,12 @@ def asdict(self) -> Dict[Any, Any]: enable_entities: bool = None """Indicates if inverse text normalization entity output is enabled.""" - audio_filtering_config: dict = None + audio_filtering_config: Optional[dict] = None """Configuration for limiting the transcription of quiet audio.""" + transcript_filtering_config: Optional[dict] = None + """Configuration for applying filtering to the transcription.""" + @dataclass class RTSpeakerDiarizationConfig: diff --git a/tests/test_cli.py b/tests/test_cli.py index d5730cc..ce22044 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -623,6 +623,7 @@ def test_rt_main_with_all_options(mock_server, tmp_path): str(chunk_size), "--auth-token=xyz", audio_path, + "--remove-disfluencies", ] cli.main(vars(cli.parse_args(args))) @@ -660,6 +661,12 @@ def test_rt_main_with_all_options(mock_server, tmp_path): assert msg["transcription_config"]["max_delay_mode"] == "fixed" assert msg["transcription_config"]["speaker_change_sensitivity"] == 0.8 assert msg["transcription_config"].get("operating_point") is None + assert ( + msg["transcription_config"]["transcript_filtering_config"][ + "remove_disfluencies" + ] + is True + ) # Check that the chunk size argument is respected add_audio_messages = mock_server.find_add_audio_messages()