diff --git a/CHANGELOG.md b/CHANGELOG.md index b045821..970a375 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add support for language identification (DEL-10418) + +### Fixed + +- Fixed an issue where `transcription_config` was not correctly loaded from the JSON config file + ## [1.6.5] - 2023-02-22 ### Fixed diff --git a/README.md b/README.md index 807901d..8b7a3b4 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,16 @@ A complete list of commands and flags can be found in the SDK docs at https://sp # $JOB_ID is from the submit command output $ speechmatics batch get-results --job-id $JOB_ID ``` + +- Submit a job with automatic language identification + + ```shell + $ speechmatics batch transcribe --language auto --langid-langs en,es example_audio.wav + ``` + If Speechmatics is not able to identify a language with high enough confidence, the job will be rejected. This is to reduce the risk of transcribing incorrectly. + + `--langid-langs` is optional and specifies what language(s) you expect to be detected in the source files. + - Submit a job with translation (translation output only available as JSON) @@ -176,6 +186,9 @@ A complete list of commands and flags can be found in the SDK docs at https://sp ``` `--translation-langs` is supported in asynchronous mode as well, and translation output can be retrieved using `get-results` with `--output-format json-v2` set. + When combining language identification with translation, we can't know if the identified language can be translated + to your translation targets. If the translation pair is not supported, the error will be recorded in the metadata of the transcript. + ### Custom Transcription Config File - Instead of passing all the transcription options via the command line you can also pass a transcription config file. The config file is a JSON file that contains the transcription options. diff --git a/docs/_modules/index.html b/docs/_modules/index.html index 40d5ebc..cf4fd6e 100644 --- a/docs/_modules/index.html +++ b/docs/_modules/index.html @@ -112,7 +112,7 @@

Quick search

| Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12 diff --git a/docs/_modules/speechmatics/batch_client.html b/docs/_modules/speechmatics/batch_client.html index 6e9e9ef..a0bfd25 100644 --- a/docs/_modules/speechmatics/batch_client.html +++ b/docs/_modules/speechmatics/batch_client.html @@ -62,7 +62,7 @@

Source code for speechmatics.batch_client

 
 
 
[docs]class BatchClient: - """Client class for Speechmatics Batch ASR REST API. + """Client class for Speechmatics Batch ASR REST API. This client may be used directly but must be closed afterwards, e.g.:: @@ -82,7 +82,7 @@

Source code for speechmatics.batch_client

     """
 
     def __init__(self, connection_settings: ConnectionSettings):
-        """Constructor method.
+        """Constructor method.
 
         :param connection_settings: Connection settings for API
         :type connection_settings: speechmatics.models.ConnectionSettings.
@@ -103,7 +103,7 @@ 

Source code for speechmatics.batch_client

         self.api_client = None
 
 
[docs] def connect(self): - """Create a connection to a Speechmatics Transcription REST endpoint""" + """Create a connection to a Speechmatics Transcription REST endpoint""" self.api_client = httpx.Client( base_url=self.connection_settings.url, timeout=None, @@ -121,7 +121,7 @@

Source code for speechmatics.batch_client

         self.close()
 
 
[docs] def close(self) -> None: - """ + """ Clean up/close client connection pool. This is required when using the client directly, but not required when @@ -132,7 +132,7 @@

Source code for speechmatics.batch_client

         self.api_client.close()
[docs] def send_request(self, method: str, path: str, **kwargs) -> httpx.Response: - """ + """ Send a request using httpx.Client() :param method: HTTP request method @@ -155,7 +155,7 @@

Source code for speechmatics.batch_client

             return response
[docs] def list_jobs(self) -> List[Dict[str, Any]]: - """ + """ Lists last 100 jobs within 7 days associated with auth_token for the SaaS or all of the jobs for the batch appliance. @@ -171,7 +171,7 @@

Source code for speechmatics.batch_client

             Dict[str, Any], BatchTranscriptionConfig, str, os.PathLike
         ],
     ) -> str:
-        """
+        """
         Submits audio and config for transcription.
 
         :param audio: Audio file path or tuple of filename and bytes
@@ -194,7 +194,7 @@ 

Source code for speechmatics.batch_client

             config_json = json.dumps(transcription_config)
         else:
             raise ValueError(
-                """Job configuration must be a BatchTranscriptionConfig object,
+                """Job configuration must be a BatchTranscriptionConfig object,
                 a filepath as a string or Path object, or a dict"""
             )
         config_data = {"config": config_json.encode("utf-8")}
@@ -217,7 +217,7 @@ 

Source code for speechmatics.batch_client

         job_id: str,
         transcription_format: str = "json-v2",
     ) -> Union[bool, str, Dict[str, Any]]:
-        """
+        """
         Request results of a transcription job.
 
         :param job_id: ID of previously submitted job.
@@ -261,7 +261,7 @@ 

Source code for speechmatics.batch_client

         return response.text
[docs] def delete_job(self, job_id: str, force: bool = False) -> str: - """ + """ Delete a job. Must pass force=True to cancel a running job. :param job_id: ID of previously submitted job. @@ -295,7 +295,7 @@

Source code for speechmatics.batch_client

             return False
[docs] def check_job_status(self, job_id: str) -> Dict[str, Any]: - """ + """ Check the status of a job. :param job_id: ID of previously submitted job. @@ -318,7 +318,7 @@

Source code for speechmatics.batch_client

 
[docs] def wait_for_completion( self, job_id: str, transcription_format: str = "txt" ) -> Union[str, Dict[str, Any]]: - """ + """ Blocks until job is complete, returning a transcript in the requested format. @@ -373,7 +373,7 @@

Source code for speechmatics.batch_client

     def _from_file(
         self, path: Union[str, os.PathLike], filetype: str
     ) -> Union[Dict[Any, Any], Tuple[str, bytes]]:
-        """Retrieve data from a file.
+        """Retrieve data from a file.
         For filetype=="json", returns a dict
         For filetype=="binary", returns a tuple of (filename, data)
         """
@@ -461,7 +461,7 @@ 

Quick search

| Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12
diff --git a/docs/_modules/speechmatics/client.html b/docs/_modules/speechmatics/client.html index d113f62..727fbe3 100644 --- a/docs/_modules/speechmatics/client.html +++ b/docs/_modules/speechmatics/client.html @@ -64,7 +64,7 @@

Source code for speechmatics.client

 
 
 
[docs]class WebsocketClient: - """ + """ Manage a transcription session with the server. The best way to interact with this library is to instantiate this client @@ -100,7 +100,7 @@

Source code for speechmatics.client

         self._buffer_semaphore = asyncio.BoundedSemaphore
 
     async def _init_synchronization_primitives(self):
-        """
+        """
         Used to initialise synchronization primitives that require
         an event loop
         """
@@ -110,7 +110,7 @@ 

Source code for speechmatics.client

         )
 
     def _flag_recognition_started(self):
-        """
+        """
         Handle a
         :py:attr:`speechmatics.models.ClientMessageType.SetRecognitionConfig`
         message from the server.
@@ -120,7 +120,7 @@ 

Source code for speechmatics.client

         self._recognition_started.set()
 
     def _set_language_pack_info(self, language_pack_info: dict):
-        """
+        """
         Update the `language_pack_info` which is a subset of information from the
         manifest in the language pack which we expose to end users via the
         RecognitionStarted message.
@@ -128,7 +128,7 @@ 

Source code for speechmatics.client

         self._language_pack_info = language_pack_info
 
 
[docs] def get_language_pack_info(self) -> dict: - """ + """ Get the `language_pack_info` which is a subset of information from the manifest in the language pack which we expose to end users. @@ -139,7 +139,7 @@

Source code for speechmatics.client

 
     @json_utf8
     def _set_recognition_config(self):
-        """
+        """
         Constructs a
         :py:attr:`speechmatics.models.ClientMessageType.SetRecognitionConfig`
         message.
@@ -153,7 +153,7 @@ 

Source code for speechmatics.client

 
     @json_utf8
     def _start_recognition(self, audio_settings):
-        """
+        """
         Constructs a
         :py:attr:`speechmatics.models.ClientMessageType.StartRecognition`
         message.
@@ -174,7 +174,7 @@ 

Source code for speechmatics.client

 
     @json_utf8
     def _end_of_stream(self):
-        """
+        """
         Constructs an
         :py:attr:`speechmatics.models.ClientMessageType.EndOfStream`
         message.
@@ -185,7 +185,7 @@ 

Source code for speechmatics.client

         return msg
 
     def _consumer(self, message):
-        """
+        """
         Consumes messages and acts on them.
 
         :param message: Message received from the server.
@@ -222,7 +222,7 @@ 

Source code for speechmatics.client

             raise TranscriptionError(message["reason"])
 
     async def _producer(self, stream, audio_chunk_size):
-        """
+        """
         Yields messages to send to the server.
 
         :param stream: File-like object which an audio stream can be read from.
@@ -250,7 +250,7 @@ 

Source code for speechmatics.client

         yield self._end_of_stream()
 
     async def _consumer_handler(self):
-        """
+        """
         Controls the consumer loop for handling messages from the server.
 
         raises: ConnectionClosedError when the upstream closes unexpectedly
@@ -268,7 +268,7 @@ 

Source code for speechmatics.client

             self._consumer(message)
 
     async def _producer_handler(self, stream, audio_chunk_size):
-        """
+        """
         Controls the producer loop for sending messages to the server.
         """
         await self._recognition_started.wait()
@@ -284,7 +284,7 @@ 

Source code for speechmatics.client

                 return
 
     def _call_middleware(self, event_name, *args):
-        """
+        """
         Call the middlewares attached to the client for the given event name.
 
         :raises ForceEndSession: If this was raised by the user's middleware.
@@ -297,7 +297,7 @@ 

Source code for speechmatics.client

                 raise
 
 
[docs] def update_transcription_config(self, new_transcription_config): - """ + """ Updates the transcription config used for the session. This results in a SetRecognitionConfig message sent to the server. @@ -309,7 +309,7 @@

Source code for speechmatics.client

             self._transcription_config_needs_update = True
[docs] def add_event_handler(self, event_name, event_handler): - """ + """ Add an event handler (callback function) to handle an incoming message from the server. Event handlers are passed a copy of the incoming message from the server. If `event_name` is set to 'all' then @@ -348,7 +348,7 @@

Source code for speechmatics.client

             self.event_handlers[event_name].append(event_handler)
[docs] def add_middleware(self, event_name, middleware): - """ + """ Add a middleware to handle outgoing messages sent to the server. Middlewares are passed a reference to the outgoing message, which they may alter. @@ -383,7 +383,7 @@

Source code for speechmatics.client

             self.middlewares[event_name].append(middleware)
async def _communicate(self, stream, audio_settings): - """ + """ Create a producer/consumer for transcription messages and communicate with the server. Internal method called from _run. @@ -412,7 +412,7 @@

Source code for speechmatics.client

                 raise exc
 
 
[docs] async def run(self, stream, transcription_config, audio_settings): - """ + """ Begin a new recognition session. This will run asynchronously. Most callers may prefer to use :py:meth:`run_synchronously` which will block until the session is @@ -466,7 +466,7 @@

Source code for speechmatics.client

             self.websocket = None
[docs] def stop(self): - """ + """ Indicates that the recognition session should be forcefully stopped. Only used in conjunction with `run`. You probably don't need to call this if you're running the client via @@ -475,7 +475,7 @@

Source code for speechmatics.client

         self._session_needs_closing = True
[docs] def run_synchronously(self, *args, timeout=None, **kwargs): - """ + """ Run the transcription synchronously. :raises asyncio.TimeoutError: If the given timeout is exceeded. """ @@ -484,7 +484,7 @@

Source code for speechmatics.client

 
 
 async def _get_temp_token(api_key):
-    """
+    """
     Used to get a temporary token from management platform api for SaaS users
     """
     mp_api_url = os.getenv("SM_MANAGEMENT_PLATFORM_URL", "https://mp.speechmatics.com")
@@ -576,7 +576,7 @@ 

Quick search

| Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12
diff --git a/docs/_modules/speechmatics/exceptions.html b/docs/_modules/speechmatics/exceptions.html index 4fffcba..4362565 100644 --- a/docs/_modules/speechmatics/exceptions.html +++ b/docs/_modules/speechmatics/exceptions.html @@ -38,26 +38,26 @@

Source code for speechmatics.exceptions

 
 
 
[docs]class TranscriptionError(Exception): - """ + """ Indicates an error in transcription. """
[docs]class EndOfTranscriptException(Exception): - """ + """ Indicates that the transcription session has finished. """
[docs]class ForceEndSession(Exception): - """ + """ Can be raised by the user from a middleware or event handler in order to force the transcription session to end early. """
[docs]class JobNotFoundException(Exception): - """ + """ Indicates that job ID was not found. """
@@ -138,7 +138,7 @@

Quick search

| Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12
diff --git a/docs/_modules/speechmatics/helpers.html b/docs/_modules/speechmatics/helpers.html index f77446f..746f5ec 100644 --- a/docs/_modules/speechmatics/helpers.html +++ b/docs/_modules/speechmatics/helpers.html @@ -44,7 +44,7 @@

Source code for speechmatics.helpers

 
 
 
[docs]def del_none(dictionary): - """ + """ Recursively delete from the dictionary all entries which values are None. This function changes the input parameter in place. @@ -63,17 +63,17 @@

Source code for speechmatics.helpers

 
 
 
[docs]def json_utf8(func): - """A decorator to turn a function's return value into JSON""" + """A decorator to turn a function's return value into JSON""" def wrapper(*args, **kwargs): - """wrapper""" + """wrapper""" return json.dumps(func(*args, **kwargs)) return wrapper
[docs]async def read_in_chunks(stream, chunk_size): - """ + """ Utility method for reading in and yielding chunks :param stream: file-like object to read audio from @@ -106,7 +106,7 @@

Source code for speechmatics.helpers

 
 
 def _process_status_errors(error):
-    """
+    """
     Takes an httpx.HTTPSStatusError and prints in a useful format for CLI
 
     :param error: the status error produced by the server for a request
@@ -215,7 +215,7 @@ 

Quick search

| Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12
diff --git a/docs/_modules/speechmatics/models.html b/docs/_modules/speechmatics/models.html index 227b554..8702f97 100644 --- a/docs/_modules/speechmatics/models.html +++ b/docs/_modules/speechmatics/models.html @@ -47,13 +47,13 @@

Source code for speechmatics.models

 
 
[docs]@dataclass class FetchData: - """Batch: Optional configuration for fetching file for transcription.""" + """Batch: Optional configuration for fetching file for transcription.""" url: str - """URL to fetch""" + """URL to fetch""" auth_headers: str = None - """ + """ A list of additional headers to be added to the input fetch request when using http or https. This is intended to support authentication or authorization, for example by supplying an OAuth2 bearer token @@ -62,23 +62,23 @@

Source code for speechmatics.models

 
 
[docs]@dataclass class NotificationConfig: - """Batch: Optional configuration for callback notification.""" + """Batch: Optional configuration for callback notification.""" url: str - """URL for notification. The `id` and `status` query parameters will be added.""" + """URL for notification. The `id` and `status` query parameters will be added.""" contents: str = None - """ + """ Specifies a list of items to be attached to the notification message. When multiple items are requested, they are included as named file attachments. """ method: str = "post" - """The HTTP(S) method to be used. Only `post` and `put` are supported.""" + """The HTTP(S) method to be used. Only `post` and `put` are supported.""" auth_headers: str = None - """ + """ A list of additional headers to be added to the notification request when using http or https. This is intended to support authentication or authorization, for example by supplying an OAuth2 bearer token @@ -87,21 +87,21 @@

Source code for speechmatics.models

 
 
[docs]@dataclass class SRTOverrides: - """Batch: Optional configuration for SRT output.""" + """Batch: Optional configuration for SRT output.""" max_line_length: int = 37 - """Maximum count of characters per subtitle line including white space""" + """Maximum count of characters per subtitle line including white space""" max_lines: int = 2 - """Sets maximum count of lines in a subtitle section"""
+ """Sets maximum count of lines in a subtitle section"""
[docs]@dataclass class _TranscriptionConfig: # pylint: disable=too-many-instance-attributes - """Base model for defining transcription parameters.""" + """Base model for defining transcription parameters.""" def __init__(self, language=None, **kwargs): - """ + """ Ignores values which are not dataclass members when initalising. This allows **kwargs to contain fields which are not in the model, which is useful for reusing code to build RT and batch configs. @@ -119,114 +119,130 @@

Source code for speechmatics.models

                 setattr(self, key, value)
 
 
[docs] def asdict(self) -> Dict[Any, Any]: - """Returns model as a dict while excluding None values recursively.""" + """Returns model as a dict while excluding None values recursively.""" return asdict( self, dict_factory=lambda x: {k: v for (k, v) in x if v is not None} )
language: str = "en" - """ISO 639-1 language code. eg. `en`""" + """ISO 639-1 language code. eg. `en`""" operating_point: str = None - """Specifies which acoustic model to use.""" + """Specifies which acoustic model to use.""" output_locale: str = None - """RFC-5646 language code for transcript output. eg. `en-AU`""" + """RFC-5646 language code for transcript output. eg. `en-AU`""" diarization: str = None - """Indicates type of diarization to use, if any.""" + """Indicates type of diarization to use, if any.""" additional_vocab: dict = None - """Additional vocabulary that is not part of the standard language.""" + """Additional vocabulary that is not part of the standard language.""" punctuation_overrides: dict = None - """Permitted puctuation marks for advanced punctuation.""" + """Permitted puctuation marks for advanced punctuation.""" domain: str = None - """Optionally request a language pack optimized for a specific domain, + """Optionally request a language pack optimized for a specific domain, e.g. 'finance'""" enable_entities: bool = None - """Indicates if inverse text normalization entity output is enabled."""
+ """Indicates if inverse text normalization entity output is enabled."""
[docs]@dataclass class RTSpeakerDiarizationConfig: - """Real-time mode: Speaker diarization config.""" + """Real-time mode: Speaker diarization config.""" max_speakers: int = None - """This enforces the maximum number of speakers allowed in a single audio stream."""
+ """This enforces the maximum number of speakers allowed in a single audio stream."""
[docs]@dataclass class BatchSpeakerDiarizationConfig: - """Batch mode: Speaker diarization config.""" + """Batch mode: Speaker diarization config.""" - speaker_sensitivity: int = None - """The sensitivity of the speaker detection."""
+ speaker_sensitivity: float = None + """The sensitivity of the speaker detection. + This is a number between 0 and 1, where 0 means least sensitive and 1 means + most sensitive."""
[docs]@dataclass class BatchTranslationConfig: - """Batch mode: Translation config.""" + """Batch mode: Translation config.""" target_languages: List[str] = None - """Target languages for which translation should be produced"""
+ """Target languages for which translation should be produced"""
+ + +
[docs]@dataclass +class BatchLanguageIdentificationConfig: + """Batch mode: Language identification config.""" + + expected_languages: List[str] = None + """Expected languages for language identification"""
[docs]@dataclass(init=False) class TranscriptionConfig(_TranscriptionConfig): - """Real-time: Defines transcription parameters.""" + """Real-time: Defines transcription parameters.""" max_delay: float = None - """Maximum acceptable delay.""" + """Maximum acceptable delay.""" max_delay_mode: str = None - """Determines whether the threshold specified in max_delay can be exceeded + """Determines whether the threshold specified in max_delay can be exceeded if a potential entity is detected. Flexible means if a potential entity is detected, then the max_delay can be overriden until the end of that entity. Fixed means that max_delay specified ignores any potential entity that would not be completed within that threshold.""" speaker_diarization_config: RTSpeakerDiarizationConfig = None - """Configuration for speaker diarization.""" + """Configuration for speaker diarization.""" speaker_change_sensitivity: float = None - """Sensitivity level for speaker change.""" + """Sensitivity level for speaker change.""" enable_partials: bool = None - """Indicates if partial transcription, where words are produced + """Indicates if partial transcription, where words are produced immediately, is enabled. """
[docs]@dataclass(init=False) class BatchTranscriptionConfig(_TranscriptionConfig): - """Batch: Defines transcription parameters for batch requests. + """Batch: Defines transcription parameters for batch requests. The `.as_config()` method will return it wrapped into a Speechmatics json config.""" fetch_data: FetchData = None - """Optional configuration for fetching file for transcription.""" + """Optional configuration for fetching file for transcription.""" notification_config: NotificationConfig = None - """Optional configuration for callback notification.""" + """Optional configuration for callback notification.""" + + language_identification_config: BatchLanguageIdentificationConfig = None + """Optional configuration for language identification.""" translation_config: BatchTranslationConfig = None - """Optional configuration for translation.""" + """Optional configuration for translation.""" srt_overrides: SRTOverrides = None - """Optional configuration for SRT output.""" + """Optional configuration for SRT output.""" speaker_diarization_config: BatchSpeakerDiarizationConfig = None - """The sensitivity of the speaker detection.""" + """The sensitivity of the speaker detection.""" channel_diarization_labels: List[str] = None - """Add your own speaker or channel labels to the transcript""" + """Add your own speaker or channel labels to the transcript""" def as_config(self): dictionary = self.asdict() fetch_data = dictionary.pop("fetch_data", None) notification_config = dictionary.pop("notification_config", None) + language_identification_config = dictionary.pop( + "language_identification_config", None + ) translation_config = dictionary.pop("translation_config", None) srt_overrides = dictionary.pop("srt_overrides", None) @@ -240,6 +256,9 @@

Source code for speechmatics.models

                 notification_config = [notification_config]
             config["notification_config"] = notification_config
 
+        if language_identification_config:
+            config["language_identification_config"] = language_identification_config
+
         if translation_config:
             config["translation_config"] = translation_config
 
@@ -251,17 +270,17 @@ 

Source code for speechmatics.models

 
 
[docs]@dataclass class AudioSettings: - """Real-time: Defines audio parameters.""" + """Real-time: Defines audio parameters.""" encoding: str = None - """Encoding format when raw audio is used. Allowed values are + """Encoding format when raw audio is used. Allowed values are `pcm_f32le`, `pcm_s16le` and `mulaw`.""" sample_rate: int = 44100 - """Sampling rate in hertz.""" + """Sampling rate in hertz.""" chunk_size: int = 1024 * 4 - """Chunk size.""" + """Chunk size.""" def asdict(self): if not self.encoding: @@ -276,83 +295,83 @@

Source code for speechmatics.models

 
 
[docs]@dataclass class ConnectionSettings: - """Defines connection parameters.""" + """Defines connection parameters.""" url: str - """Websocket server endpoint.""" + """Websocket server endpoint.""" message_buffer_size: int = 512 - """Message buffer size in bytes.""" + """Message buffer size in bytes.""" ssl_context: ssl.SSLContext = field(default_factory=ssl.create_default_context) - """SSL context.""" + """SSL context.""" semaphore_timeout_seconds: float = 120 - """Semaphore timeout in seconds.""" + """Semaphore timeout in seconds.""" ping_timeout_seconds: float = 60 - """Ping-pong timeout in seconds.""" + """Ping-pong timeout in seconds.""" auth_token: str = None - """auth token to authenticate a customer. + """auth token to authenticate a customer. This auth token is only applicable for RT-SaaS.""" generate_temp_token: Optional[bool] = False - """Automatically generate a temporary token for authentication. + """Automatically generate a temporary token for authentication. Non-enterprise customers must set this to True. Enterprise customers should set this to False."""
[docs]class ClientMessageType(str, Enum): # pylint: disable=invalid-name - """Real-time: Defines various messages sent from client to server.""" + """Real-time: Defines various messages sent from client to server.""" StartRecognition = "StartRecognition" - """Initiates a recognition job based on configuration set previously.""" + """Initiates a recognition job based on configuration set previously.""" AddAudio = "AddAudio" - """Adds more audio data to the recognition job. The server confirms + """Adds more audio data to the recognition job. The server confirms receipt by sending an :py:attr:`ServerMessageType.AudioAdded` message.""" EndOfStream = "EndOfStream" - """Indicates that the client has no more audio to send.""" + """Indicates that the client has no more audio to send.""" SetRecognitionConfig = "SetRecognitionConfig" - """Allows the client to re-configure the recognition session."""
+ """Allows the client to re-configure the recognition session."""
[docs]class ServerMessageType(str, Enum): # pylint: disable=invalid-name - """Real-time: Defines various message types sent from server to client.""" + """Real-time: Defines various message types sent from server to client.""" RecognitionStarted = "RecognitionStarted" - """Server response to :py:attr:`ClientMessageType.StartRecognition`, + """Server response to :py:attr:`ClientMessageType.StartRecognition`, acknowledging that a recognition session has started.""" AudioAdded = "AudioAdded" - """Server response to :py:attr:`ClientMessageType.AddAudio`, indicating + """Server response to :py:attr:`ClientMessageType.AddAudio`, indicating that audio has been added successfully.""" AddPartialTranscript = "AddPartialTranscript" - """Indicates a partial transcript, which is an incomplete transcript that + """Indicates a partial transcript, which is an incomplete transcript that is immediately produced and may change as more context becomes available. """ AddTranscript = "AddTranscript" - """Indicates the final transcript of a part of the audio.""" + """Indicates the final transcript of a part of the audio.""" EndOfTranscript = "EndOfTranscript" - """Server response to :py:attr:`ClientMessageType.EndOfStream`, + """Server response to :py:attr:`ClientMessageType.EndOfStream`, after the server has finished sending all :py:attr:`AddTranscript` messages.""" Info = "Info" - """Indicates a generic info message.""" + """Indicates a generic info message.""" Warning = "Warning" - """Indicates a generic warning message.""" + """Indicates a generic warning message.""" Error = "Error" - """Indicates n generic error message."""
+ """Indicates n generic error message."""
@@ -431,7 +450,7 @@

Quick search

| Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12
diff --git a/docs/_static/alabaster.css b/docs/_static/alabaster.css index 6476cd5..461b3ed 100644 --- a/docs/_static/alabaster.css +++ b/docs/_static/alabaster.css @@ -419,9 +419,7 @@ table.footnote td { } dl { - margin-left: 0; - margin-right: 0; - margin-top: 0; + margin: 0; padding: 0; } diff --git a/docs/batch_client.html b/docs/batch_client.html index 4472e05..d1fa45f 100644 --- a/docs/batch_client.html +++ b/docs/batch_client.html @@ -314,7 +314,7 @@

Quick search

| Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12 | Sub-commands

Transcribe an audio file or stream in real time and output the results to the console.

speechmatics rt transcribe [-h] [--enable-partials] [--punctuation-sensitivity PUNCTUATION_SENSITIVITY] [--speaker-diarization-max-speakers SPEAKER_DIARIZATION_MAX_SPEAKERS]
-                           [--speaker-change-sensitivity SPEAKER_CHANGE_SENSITIVITY] [--speaker-change-token] [--max-delay MAX_DELAY] [--max-delay-mode {fixed,flexible}]
-                           [--raw ENCODING] [--sample-rate SAMPLE_RATE] [--chunk-size CHUNK_SIZE] [--buffer-size BUFFER_SIZE] [--print-json]
-                           [--diarization {none,speaker,speaker_change}] [--url URL] [--auth-token AUTH_TOKEN] [--ssl-mode {regular,insecure,none}] [--generate-temp-token]
-                           [--config-file CONFIG_FILE] [--lang LANGUAGE] [--operating-point {standard,enhanced}] [--domain DOMAIN] [--output-locale LOCALE]
-                           [--additional-vocab [ADDITIONAL_VOCAB [ADDITIONAL_VOCAB ...]]] [--additional-vocab-file VOCAB_FILEPATH]
-                           [--punctuation-permitted-marks PUNCTUATION_PERMITTED_MARKS] [--enable-entities] [--translation-langs TRANSLATION_TARGET_LANGUAGES]
+                           [--speaker-change-sensitivity SPEAKER_CHANGE_SENSITIVITY] [--speaker-change-token] [--max-delay MAX_DELAY] [--max-delay-mode {fixed,flexible}] [--raw ENCODING]
+                           [--sample-rate SAMPLE_RATE] [--chunk-size CHUNK_SIZE] [--buffer-size BUFFER_SIZE] [--print-json] [--diarization {none,speaker,speaker_change}] [--url URL]
+                           [--auth-token AUTH_TOKEN] [--ssl-mode {regular,insecure,none}] [--generate-temp-token] [--config-file CONFIG_FILE] [--lang LANGUAGE] [--operating-point {standard,enhanced}]
+                           [--domain DOMAIN] [--output-locale LOCALE] [--additional-vocab [ADDITIONAL_VOCAB [ADDITIONAL_VOCAB ...]]] [--additional-vocab-file VOCAB_FILEPATH]
+                           [--punctuation-permitted-marks PUNCTUATION_PERMITTED_MARKS] [--enable-entities] [--translation-target-langs TRANSLATION_TARGET_LANGUAGES]
                            FILEPATHS [FILEPATHS ...]
 
@@ -193,7 +192,7 @@
Named Arguments
speechmatics batch transcribe [-h] [--url URL] [--auth-token AUTH_TOKEN] [--ssl-mode {regular,insecure,none}] [--generate-temp-token] [--config-file CONFIG_FILE] [--lang LANGUAGE]
                               [--operating-point {standard,enhanced}] [--domain DOMAIN] [--output-locale LOCALE] [--additional-vocab [ADDITIONAL_VOCAB [ADDITIONAL_VOCAB ...]]]
                               [--additional-vocab-file VOCAB_FILEPATH] [--punctuation-permitted-marks PUNCTUATION_PERMITTED_MARKS] [--enable-entities]
-                              [--translation-langs TRANSLATION_TARGET_LANGUAGES] [--output-format {txt,json,json-v2,srt}]
-                              [--speaker-diarization-sensitivity SPEAKER_DIARIZATION_SENSITIVITY] [--diarization {none,speaker,channel,channel_and_speaker_change}]
-                              [--channel-diarization-labels CHANNEL_DIARIZATION_LABELS [CHANNEL_DIARIZATION_LABELS ...]]
+                              [--translation-target-langs TRANSLATION_TARGET_LANGUAGES] [--output-format {txt,json,json-v2,srt}] [--speaker-diarization-sensitivity SPEAKER_DIARIZATION_SENSITIVITY]
+                              [--diarization {none,speaker,channel,channel_and_speaker_change}] [--channel-diarization-labels CHANNEL_DIARIZATION_LABELS [CHANNEL_DIARIZATION_LABELS ...]]
                               FILEPATHS [FILEPATHS ...]
 
@@ -283,7 +281,7 @@
Named Arguments
speechmatics batch submit [-h] [--url URL] [--auth-token AUTH_TOKEN] [--ssl-mode {regular,insecure,none}] [--generate-temp-token] [--config-file CONFIG_FILE] [--lang LANGUAGE]
                           [--operating-point {standard,enhanced}] [--domain DOMAIN] [--output-locale LOCALE] [--additional-vocab [ADDITIONAL_VOCAB [ADDITIONAL_VOCAB ...]]]
-                          [--additional-vocab-file VOCAB_FILEPATH] [--punctuation-permitted-marks PUNCTUATION_PERMITTED_MARKS] [--enable-entities]
-                          [--translation-langs TRANSLATION_TARGET_LANGUAGES] [--output-format {txt,json,json-v2,srt}]
-                          [--speaker-diarization-sensitivity SPEAKER_DIARIZATION_SENSITIVITY] [--diarization {none,speaker,channel,channel_and_speaker_change}]
+                          [--additional-vocab-file VOCAB_FILEPATH] [--punctuation-permitted-marks PUNCTUATION_PERMITTED_MARKS] [--enable-entities] [--translation-target-langs TRANSLATION_TARGET_LANGUAGES]
+                          [--output-format {txt,json,json-v2,srt}] [--speaker-diarization-sensitivity SPEAKER_DIARIZATION_SENSITIVITY] [--diarization {none,speaker,channel,channel_and_speaker_change}]
                           [--channel-diarization-labels CHANNEL_DIARIZATION_LABELS [CHANNEL_DIARIZATION_LABELS ...]]
                           FILEPATHS [FILEPATHS ...]
 
@@ -370,7 +367,7 @@
Named Arguments
get-results

Retrieve results of a transcription job.

-
speechmatics batch get-results [-h] [--url URL] [--auth-token AUTH_TOKEN] [--ssl-mode {regular,insecure,none}] [--generate-temp-token] [--output-format {txt,json,json-v2,srt}]
-                               --job-id JOB_ID
+
speechmatics batch get-results [-h] [--url URL] [--auth-token AUTH_TOKEN] [--ssl-mode {regular,insecure,none}] [--generate-temp-token] [--output-format {txt,json,json-v2,srt}] --job-id JOB_ID
 
@@ -455,8 +451,7 @@
Named Arguments
delete

Delete the results of a transcription job.

-
speechmatics batch delete [-h] [--url URL] [--auth-token AUTH_TOKEN] [--ssl-mode {regular,insecure,none}] [--generate-temp-token] [--output-format {txt,json,json-v2,srt}] --job-id
-                          JOB_ID [--force]
+
speechmatics batch delete [-h] [--url URL] [--auth-token AUTH_TOKEN] [--ssl-mode {regular,insecure,none}] [--generate-temp-token] [--output-format {txt,json,json-v2,srt}] --job-id JOB_ID [--force]
 
@@ -526,12 +521,11 @@
Named Arguments

Real-time commands. RETAINED FOR LEGACY COMPATIBILITY.

speechmatics transcribe [-h] [--enable-partials] [--punctuation-sensitivity PUNCTUATION_SENSITIVITY] [--speaker-diarization-max-speakers SPEAKER_DIARIZATION_MAX_SPEAKERS]
-                        [--speaker-change-sensitivity SPEAKER_CHANGE_SENSITIVITY] [--speaker-change-token] [--max-delay MAX_DELAY] [--max-delay-mode {fixed,flexible}]
-                        [--raw ENCODING] [--sample-rate SAMPLE_RATE] [--chunk-size CHUNK_SIZE] [--buffer-size BUFFER_SIZE] [--print-json]
-                        [--diarization {none,speaker,speaker_change}] [--url URL] [--auth-token AUTH_TOKEN] [--ssl-mode {regular,insecure,none}] [--generate-temp-token]
-                        [--config-file CONFIG_FILE] [--lang LANGUAGE] [--operating-point {standard,enhanced}] [--domain DOMAIN] [--output-locale LOCALE]
-                        [--additional-vocab [ADDITIONAL_VOCAB [ADDITIONAL_VOCAB ...]]] [--additional-vocab-file VOCAB_FILEPATH]
-                        [--punctuation-permitted-marks PUNCTUATION_PERMITTED_MARKS] [--enable-entities] [--translation-langs TRANSLATION_TARGET_LANGUAGES]
+                        [--speaker-change-sensitivity SPEAKER_CHANGE_SENSITIVITY] [--speaker-change-token] [--max-delay MAX_DELAY] [--max-delay-mode {fixed,flexible}] [--raw ENCODING]
+                        [--sample-rate SAMPLE_RATE] [--chunk-size CHUNK_SIZE] [--buffer-size BUFFER_SIZE] [--print-json] [--diarization {none,speaker,speaker_change}] [--url URL] [--auth-token AUTH_TOKEN]
+                        [--ssl-mode {regular,insecure,none}] [--generate-temp-token] [--config-file CONFIG_FILE] [--lang LANGUAGE] [--operating-point {standard,enhanced}] [--domain DOMAIN]
+                        [--output-locale LOCALE] [--additional-vocab [ADDITIONAL_VOCAB [ADDITIONAL_VOCAB ...]]] [--additional-vocab-file VOCAB_FILEPATH]
+                        [--punctuation-permitted-marks PUNCTUATION_PERMITTED_MARKS] [--enable-entities] [--translation-target-langs TRANSLATION_TARGET_LANGUAGES]
                         FILEPATHS [FILEPATHS ...]
 
@@ -637,7 +631,7 @@

Named ArgumentsQuick search

| Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12 | Quick search
| Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12 | Quick search
| Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12 | Quick search
| Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12 | B
  • BATCH_SELF_SERVICE_URL (in module speechmatics.constants)
  • BatchClient (class in speechmatics.batch_client) +
  • +
  • BatchLanguageIdentificationConfig (class in speechmatics.models)
  • - + @@ -233,6 +237,8 @@

    L

    @@ -537,7 +543,7 @@

    Quick search

    | Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12
    diff --git a/docs/helpers.html b/docs/helpers.html index 74a7a19..fb983b3 100644 --- a/docs/helpers.html +++ b/docs/helpers.html @@ -163,7 +163,7 @@

    Quick search

    | Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12 | Quick search | Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12 | +
    +
    +class speechmatics.models.BatchLanguageIdentificationConfig(expected_languages: Optional[List[str]] = None)[source]
    +

    Batch mode: Language identification config.

    +
    +
    +expected_languages: List[str] = None
    +

    Expected languages for language identification

    +
    + +
    +
    -class speechmatics.models.BatchSpeakerDiarizationConfig(speaker_sensitivity: Optional[int] = None)[source]
    +class speechmatics.models.BatchSpeakerDiarizationConfig(speaker_sensitivity: Optional[float] = None)[source]

    Batch mode: Speaker diarization config.

    -speaker_sensitivity: int = None
    -

    The sensitivity of the speaker detection.

    +speaker_sensitivity: float = None +

    The sensitivity of the speaker detection. +This is a number between 0 and 1, where 0 means least sensitive and 1 means +most sensitive.

    @@ -151,6 +165,12 @@

    Optional configuration for fetching file for transcription.

    +
    +
    +language_identification_config: speechmatics.models.BatchLanguageIdentificationConfig = None
    +

    Optional configuration for language identification.

    +
    +
    notification_config: speechmatics.models.NotificationConfig = None
    @@ -533,7 +553,7 @@

    Quick search

    | Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12 | Quick search | Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12
    diff --git a/docs/search.html b/docs/search.html index 7c6cc64..280e04d 100644 --- a/docs/search.html +++ b/docs/search.html @@ -131,7 +131,7 @@

    Related Topics

    | Powered by Sphinx 4.4.0 - & Alabaster 0.7.13 + & Alabaster 0.7.12
    diff --git a/docs/searchindex.js b/docs/searchindex.js index e6895e3..8efc16b 100644 --- a/docs/searchindex.js +++ b/docs/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["batch_client","cli_parser","client","constants","exceptions","helpers","index","models"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.viewcode":1,sphinx:56},filenames:["batch_client.rst","cli_parser.rst","client.rst","constants.rst","exceptions.rst","helpers.rst","index.rst","models.rst"],objects:{"speechmatics.batch_client":[[0,1,1,"","BatchClient"]],"speechmatics.batch_client.BatchClient":[[0,2,1,"","check_job_status"],[0,2,1,"","close"],[0,2,1,"","connect"],[0,2,1,"","delete_job"],[0,2,1,"","get_job_result"],[0,2,1,"","list_jobs"],[0,2,1,"","send_request"],[0,2,1,"","submit_job"],[0,2,1,"","wait_for_completion"]],"speechmatics.client":[[2,1,1,"","WebsocketClient"]],"speechmatics.client.WebsocketClient":[[2,2,1,"","add_event_handler"],[2,2,1,"","add_middleware"],[2,2,1,"","get_language_pack_info"],[2,2,1,"","run"],[2,2,1,"","run_synchronously"],[2,2,1,"","stop"],[2,2,1,"","update_transcription_config"]],"speechmatics.constants":[[3,3,1,"","BATCH_SELF_SERVICE_URL"],[3,3,1,"","RT_SELF_SERVICE_URL"]],"speechmatics.exceptions":[[4,4,1,"","EndOfTranscriptException"],[4,4,1,"","ForceEndSession"],[4,4,1,"","JobNotFoundException"],[4,4,1,"","TranscriptionError"]],"speechmatics.helpers":[[5,5,1,"","del_none"],[5,5,1,"","json_utf8"],[5,5,1,"","read_in_chunks"]],"speechmatics.models":[[7,1,1,"","AudioSettings"],[7,1,1,"","BatchSpeakerDiarizationConfig"],[7,1,1,"","BatchTranscriptionConfig"],[7,1,1,"","BatchTranslationConfig"],[7,1,1,"","ClientMessageType"],[7,1,1,"","ConnectionSettings"],[7,1,1,"","FetchData"],[7,1,1,"","NotificationConfig"],[7,1,1,"","RTSpeakerDiarizationConfig"],[7,1,1,"","SRTOverrides"],[7,1,1,"","ServerMessageType"],[7,1,1,"","TranscriptionConfig"],[7,1,1,"","_TranscriptionConfig"]],"speechmatics.models.AudioSettings":[[7,6,1,"","chunk_size"],[7,6,1,"","encoding"],[7,6,1,"","sample_rate"]],"speechmatics.models.BatchSpeakerDiarizationConfig":[[7,6,1,"","speaker_sensitivity"]],"speechmatics.models.BatchTranscriptionConfig":[[7,6,1,"","channel_diarization_labels"],[7,6,1,"","fetch_data"],[7,6,1,"","notification_config"],[7,6,1,"","speaker_diarization_config"],[7,6,1,"","srt_overrides"],[7,6,1,"","translation_config"]],"speechmatics.models.BatchTranslationConfig":[[7,6,1,"","target_languages"]],"speechmatics.models.ClientMessageType":[[7,6,1,"","AddAudio"],[7,6,1,"","EndOfStream"],[7,6,1,"","SetRecognitionConfig"],[7,6,1,"","StartRecognition"]],"speechmatics.models.ConnectionSettings":[[7,6,1,"","auth_token"],[7,6,1,"","generate_temp_token"],[7,6,1,"","message_buffer_size"],[7,6,1,"","ping_timeout_seconds"],[7,6,1,"","semaphore_timeout_seconds"],[7,6,1,"","ssl_context"],[7,6,1,"","url"]],"speechmatics.models.FetchData":[[7,6,1,"","auth_headers"],[7,6,1,"","url"]],"speechmatics.models.NotificationConfig":[[7,6,1,"","auth_headers"],[7,6,1,"","contents"],[7,6,1,"","method"],[7,6,1,"","url"]],"speechmatics.models.RTSpeakerDiarizationConfig":[[7,6,1,"","max_speakers"]],"speechmatics.models.SRTOverrides":[[7,6,1,"","max_line_length"],[7,6,1,"","max_lines"]],"speechmatics.models.ServerMessageType":[[7,6,1,"","AddPartialTranscript"],[7,6,1,"","AddTranscript"],[7,6,1,"","AudioAdded"],[7,6,1,"","EndOfTranscript"],[7,6,1,"","Error"],[7,6,1,"","Info"],[7,6,1,"","RecognitionStarted"],[7,6,1,"","Warning"]],"speechmatics.models.TranscriptionConfig":[[7,6,1,"","enable_partials"],[7,6,1,"","max_delay"],[7,6,1,"","max_delay_mode"],[7,6,1,"","speaker_change_sensitivity"],[7,6,1,"","speaker_diarization_config"]],"speechmatics.models._TranscriptionConfig":[[7,6,1,"","additional_vocab"],[7,2,1,"","asdict"],[7,6,1,"","diarization"],[7,6,1,"","domain"],[7,6,1,"","enable_entities"],[7,6,1,"","language"],[7,6,1,"","operating_point"],[7,6,1,"","output_locale"],[7,6,1,"","punctuation_overrides"]],speechmatics:[[0,0,0,"-","batch_client"],[2,0,0,"-","client"],[3,0,0,"-","constants"],[4,0,0,"-","exceptions"],[5,0,0,"-","helpers"],[7,0,0,"-","models"]]},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","data","Python data"],"4":["py","exception","Python exception"],"5":["py","function","Python function"],"6":["py","attribute","Python attribute"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:data","4":"py:exception","5":"py:function","6":"py:attribute"},terms:{"0":1,"1":[1,7],"100":[0,1],"12":1,"120":7,"168":1,"192":1,"2":[1,7],"20":1,"37":7,"404":0,"4096":[1,7],"44100":[1,7],"5":1,"512":[1,7],"5646":7,"60":7,"639":[1,7],"7":[0,1],"8":1,"9000":[1,2],"boolean":2,"byte":[0,1,5,7],"class":[0,1,2,7],"default":[0,1],"final":[1,7],"float":7,"function":[2,5],"import":6,"int":[5,7],"new":2,"return":[0,1,2,5,7],"true":[0,1,6,7],"while":[1,7],A:[2,5,7],FOR:1,For:[0,2],If:[1,2],It:0,The:[1,2,3,6,7],With:1,_transcriptionconfig:7,about:1,accept:[0,1,7],access:6,accur:1,acknowledg:7,acoust:[1,7],ad:[2,3,7],add:[1,2,6,7],add_event_handl:[2,6],add_middlewar:2,addaudio:[2,7],addit:[1,7],additional_vocab:[1,7],addpartialtranscript:[6,7],addtranscript:[2,6,7],advanc:[1,7],after:7,afterward:0,alia:0,all:[0,1,2,5,7],allow:[1,7],also:0,alter:2,an:[0,1,2,4,6,7],ani:[0,2,7],anymor:1,api:[0,1,2,3,6],appli:1,applianc:[0,1],applic:7,ar:[0,1,2,5,7],arg:2,argument:[2,6],as_config:7,asdict:7,asr:[0,1,2,3],associ:0,async:[2,5],asynchron:2,asyncio:2,asynciter:5,attach:7,au:7,audio:[0,1,2,5,6,7],audio_file_path:6,audio_set:2,audioad:7,audioset:[2,6,7],auth:[1,7],auth_head:7,auth_token:[0,1,6,7],authent:[1,7],author:[1,7],automat:[1,7],avail:[3,7],base:[2,7],batch:[0,3,6,7],batch_client:6,batch_self_service_url:3,batchclient:0,batchspeakerdiarizationconfig:7,batchtranscriptionconfig:[0,7],batchtranslationconfig:7,bearer:7,becom:7,been:[2,7],befor:1,begin:2,being:[2,6],below:6,best:2,binari:2,block:[0,2],bool:[0,2,7],buffer:[1,7],buffer_s:1,call:2,callabl:2,callback:[2,7],caller:2,can:[1,2,4,7],cancel:0,certif:1,chang:[1,5,7],channel:[1,7],channel_and_speaker_chang:1,channel_diarization_label:[1,7],charact:7,check:0,check_job_statu:0,choic:1,chunk:[1,5,7],chunk_siz:[1,5,7],clean:0,cli:6,client:[0,1,6,7],clientmessagetyp:7,close:0,code:[1,7],collect:5,com:[1,3,6],comma:1,compat:1,complet:[0,7],computation:1,conf:6,config:[0,2,7],config_fil:1,configur:[0,1,2,7],confirm:7,conjunct:2,connect:[0,2,7],connection_set:[0,2],connection_url:6,connectionset:[0,2,6,7],consol:1,constant:3,consum:2,content:[1,7],context:[0,1,7],copi:2,count:7,creat:[0,1,6],custom:[1,3,6,7],dai:[0,1],data:[1,2,5,7],de:1,debug:1,decor:5,def:6,defin:[6,7],del_non:5,delai:[1,7],delet:[0,5],delete_job:0,detect:[1,7],determin:7,diariz:[1,7],dict:[0,2,5,7],dictionari:5,directli:0,doc:2,doe:0,doesn:3,domain:[1,7],don:[2,6],e:[0,1,2,7],each:[1,5],earli:4,eg:[1,7],en:[1,2,6,7],enabl:[1,7],enable_ent:7,enable_parti:[6,7],encod:[1,7],encodingof:1,end:[2,3,4,7],endofstream:7,endoftranscript:7,endoftranscriptexcept:4,endpoint:[0,7],enforc:[1,7],enhanc:1,enterpris:[1,3,6,7],entiti:[1,7],entri:5,environ:1,error:[0,4,7],eu2:[3,6],event:[2,4,6],event_handl:[2,6],event_nam:[2,6],everi:2,exampl:[1,2,7],exceed:[2,7],except:[0,2,6],exclud:7,exist:0,expect:1,expens:1,expos:2,f:6,factori:7,fail:0,fals:[0,1,7],fetch:7,fetch_data:7,fetchdata:7,field:2,file:[0,1,2,5,6,7],filenam:0,filepath:1,financ:[1,7],finish:[2,4,7],first:2,fix:[1,7],flag:1,flexibl:[1,7],foracknowledg:1,forc:[0,1,4],forceendsess:4,forcefulli:2,format:[0,1,7],found:[0,4],fr:1,from:[1,2,4,5,7],full:6,func:5,g:[0,1,7],gener:[1,7],generate_temp_token:[1,6,7],get:2,get_job_result:0,get_language_pack_info:2,github:6,given:2,global:1,gnocchi:1,h:1,ha:[2,4,7],handl:[0,1,2],handler:[2,4,6],have:3,header:7,helper:6,here:6,hertz:7,how:1,html:2,http:[0,1,2,3,7],httperror:0,httpx:0,hz:1,i:2,id:[0,1,4,7],ignor:7,illustr:6,immedi:7,impli:2,includ:[2,7],incom:2,incomplet:7,increas:1,index:6,indic:[1,2,4,7],info:[1,7],inform:[1,2],initi:7,input:[1,5,7],insecur:1,instanti:2,intend:7,interact:2,interfac:[0,2],interpret:1,inth:1,invalid:0,invers:7,io:[2,5],iobas:[2,5],iso:[1,7],item:7,job:[0,4,7],job_id:[0,1],jobnotfoundexcept:[0,4],json:[0,1,5,7],json_utf8:5,just:2,kwarg:[0,2,7],label1:1,label2:1,label:[1,7],lambda:2,lang:1,languag:[1,2,3,6,7],language_pack_info:2,larger:1,last:[0,1],latenc:1,later:1,latest:2,legaci:1,length:5,level:[1,7],librari:[0,2,3,4,5,7],like:[1,2,5],line:[1,7],list:[0,2,7],list_job:0,list_of_job:0,local:1,localhost:2,log:1,mai:[0,2,7],manag:[0,2],manifest:2,mark:[1,7],max:1,max_delai:[1,7],max_delay_mod:7,max_lin:7,max_line_length:7,max_sample_s:5,max_speak:7,maximum:[1,5,7],mean:7,messag:[1,2,7],message_buffer_s:7,metadata:6,method:[0,5,7],middl:1,middlewar:[2,4],min:1,mode:[6,7],model:[0,1,2,6],modul:6,more:[1,7],most:2,msg:[2,6],much:1,mulaw:7,multipl:[5,7],must:[0,1,7],n:7,name:[2,6,7],need:[2,6],new_transcription_config:2,nochi:1,nokei:1,non:[1,3,7],none:[0,1,2,5,7],normal:7,note:3,notif:7,notification_config:7,notificationconfig:7,number:[1,7],oauth2:7,object:[0,2,5],one:1,onli:[1,2,7],open:[0,6],oper:1,operating_point:7,optim:7,option:[0,1,7],order:4,os:0,other:[0,1],out:2,outgo:2,output:[1,5,7],output_local:7,overhead:1,overrid:1,overriden:7,own:[1,7],pack:[1,2,7],page:6,paramet:[0,2,5,6,7],part:[6,7],partial:[1,6,7],particular:2,pass:[0,2],path:[0,6],pathlik:0,pcm_f32le:[1,7],pcm_s16le:7,per:7,permit:[1,7],piec:1,ping:7,ping_timeout_second:7,place:5,plaintext:1,pleas:6,point:1,pong:7,pool:0,possibl:[1,2],post:7,potenti:7,pre:1,prefer:2,preset:1,previous:[0,7],print:[1,2,6],print_partial_transcript:6,print_transcript:6,probabl:2,process:[1,2],produc:[2,7],producer_consum:2,product:1,provid:[1,6],puctuat:7,punctuat:[1,7],punctuation_overrid:7,punctuation_permitted_mark:1,punctuation_sensit:1,put:7,queri:7,rais:[0,2,4,5],rate:[1,7],rather:1,raw:[1,7],rb:6,re:[2,7],read:[1,2,5],read_in_chunk:5,readm:6,readthedoc:2,real:[1,2,7],realtim:[3,6],receipt:7,receiv:[1,2],recognisedent:1,recognit:[2,7],recognitionstart:[2,7],recurs:[5,7],redirect:1,refer:2,regist:6,regular:1,remov:1,repres:1,request:[0,1,7],requir:[0,1],respect:1,respond:2,respons:[0,7],rest:0,result:[0,2],retain:1,retriev:1,rfc:7,rt:[3,6,7],rt_self_service_url:3,rtspeakerdiarizationconfig:7,run:[0,1,2],run_synchron:[2,6],s:[1,5,7],saa:[0,1,7],sampl:[1,7],sample_r:[1,7],sc:1,search:6,second:[2,7],section:7,see:6,select:1,self:[1,3],semaphor:7,semaphore_timeout_second:7,send:[0,1,7],send_request:0,sensit:[1,7],sent:[2,7],separ:1,sequenc:5,server:[1,2,7],servermessagetyp:[2,6,7],servic:3,session:[2,4,7],set:[0,2,6,7],setrecognitionconfig:[2,7],share:1,should:[1,2,7],show:1,sign:1,simpl:[1,2],singl:[1,7],size:[1,5,7],small:1,sound:1,sourc:[0,2,4,5,7],space:[1,7],speaker:[1,7],speaker_chang:1,speaker_change_sensit:[1,7],speaker_diarization_config:7,speaker_diarization_max_speak:1,speaker_diarization_sensit:1,speaker_sensit:7,special:1,specif:7,specifi:[1,7],speech:1,srt:[0,1,7],srt_overrid:7,srtoverrid:7,ssl:[1,7],ssl_context:7,sslcontext:7,standard:[1,7],start:7,startrecognit:7,statu:[0,7],stderr:1,stdout:1,still:0,stop:2,str:[0,2,7],stream:[1,2,5,7],sub:6,submit:0,submit_job:0,subset:2,subtitl:7,successfulli:7,suppli:7,support:7,symbol:1,synchron:2,t:[2,3,6],target:7,target_languag:7,task:2,temp:1,temporari:[1,7],termin:0,text:7,than:[0,1],thei:[2,7],thi:[0,1,2,5,6,7],threshold:7,time:[1,2,7],timeout:[2,7],timeouterror:2,token:[0,1,6,7],too:1,tool:6,transcrib:6,transcript:[0,1,2,4,6,7],transcription_config:[0,2],transcription_format:0,transcriptionconfig:[2,6,7],transcriptionerror:[0,4],translat:[1,7],translation_config:7,translation_target_languag:1,tupl:0,turn:5,txt:[0,1],type:[0,1,2,5,7],unbreak:1,union:0,unnecessari:1,unset:0,until:[0,2,7],up:0,updat:[1,2],update_transcription_config:2,url:[0,1,2,3,6,7],us:[0,1,2,3,4,5,7],usag:1,user:[2,4],util:5,v2:[0,1,2,3,6],v:1,valid:[0,1,2],valu:[1,5,7],valueerror:[2,5],variou:7,verbos:1,via:2,vocab:1,vocab_filepath:1,vocabulari:7,vv:1,wa:[1,4,5],wai:2,wait:1,wait_for_complet:0,warn:7,wav:6,waveform:6,we:[0,2],websocket:[1,2,7],websocketcli:[2,6],when:[0,1,2,7],where:[1,5,7],wherev:1,whether:[1,2,7],which:[0,1,2,5,7],white:7,wire:1,within:[0,1,7],word:7,work:1,would:7,wrap:7,wrapper:[0,2,6],ws:6,wss:[1,2,3,6],yet:2,yield:5,you:[0,1,2],your:[1,7]},titles:["speechmatics.batch_client","Speechmatics CLI Tool","speechmatics.client","speechmatics.client","speechmatics.exceptions","speechmatics.helpers","speechmatics-python","speechmatics.models"],titleterms:{argument:1,batch:1,batch_client:0,cli:1,client:[2,3],command:[1,6],config:1,delet:1,exampl:6,except:4,get:1,helper:5,indic:6,job:1,librari:6,line:6,list:1,mode:1,model:7,name:1,posit:1,python:6,refer:6,result:1,rt:1,set:1,speechmat:[0,1,2,3,4,5,6,7],statu:1,sub:1,submit:1,tabl:6,tool:1,transcrib:1,unset:1,usag:6}}) \ No newline at end of file +Search.setIndex({docnames:["batch_client","cli_parser","client","constants","exceptions","helpers","index","models"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.viewcode":1,sphinx:56},filenames:["batch_client.rst","cli_parser.rst","client.rst","constants.rst","exceptions.rst","helpers.rst","index.rst","models.rst"],objects:{"speechmatics.batch_client":[[0,1,1,"","BatchClient"]],"speechmatics.batch_client.BatchClient":[[0,2,1,"","check_job_status"],[0,2,1,"","close"],[0,2,1,"","connect"],[0,2,1,"","delete_job"],[0,2,1,"","get_job_result"],[0,2,1,"","list_jobs"],[0,2,1,"","send_request"],[0,2,1,"","submit_job"],[0,2,1,"","wait_for_completion"]],"speechmatics.client":[[2,1,1,"","WebsocketClient"]],"speechmatics.client.WebsocketClient":[[2,2,1,"","add_event_handler"],[2,2,1,"","add_middleware"],[2,2,1,"","get_language_pack_info"],[2,2,1,"","run"],[2,2,1,"","run_synchronously"],[2,2,1,"","stop"],[2,2,1,"","update_transcription_config"]],"speechmatics.constants":[[3,3,1,"","BATCH_SELF_SERVICE_URL"],[3,3,1,"","RT_SELF_SERVICE_URL"]],"speechmatics.exceptions":[[4,4,1,"","EndOfTranscriptException"],[4,4,1,"","ForceEndSession"],[4,4,1,"","JobNotFoundException"],[4,4,1,"","TranscriptionError"]],"speechmatics.helpers":[[5,5,1,"","del_none"],[5,5,1,"","json_utf8"],[5,5,1,"","read_in_chunks"]],"speechmatics.models":[[7,1,1,"","AudioSettings"],[7,1,1,"","BatchLanguageIdentificationConfig"],[7,1,1,"","BatchSpeakerDiarizationConfig"],[7,1,1,"","BatchTranscriptionConfig"],[7,1,1,"","BatchTranslationConfig"],[7,1,1,"","ClientMessageType"],[7,1,1,"","ConnectionSettings"],[7,1,1,"","FetchData"],[7,1,1,"","NotificationConfig"],[7,1,1,"","RTSpeakerDiarizationConfig"],[7,1,1,"","SRTOverrides"],[7,1,1,"","ServerMessageType"],[7,1,1,"","TranscriptionConfig"],[7,1,1,"","_TranscriptionConfig"]],"speechmatics.models.AudioSettings":[[7,6,1,"","chunk_size"],[7,6,1,"","encoding"],[7,6,1,"","sample_rate"]],"speechmatics.models.BatchLanguageIdentificationConfig":[[7,6,1,"","expected_languages"]],"speechmatics.models.BatchSpeakerDiarizationConfig":[[7,6,1,"","speaker_sensitivity"]],"speechmatics.models.BatchTranscriptionConfig":[[7,6,1,"","channel_diarization_labels"],[7,6,1,"","fetch_data"],[7,6,1,"","language_identification_config"],[7,6,1,"","notification_config"],[7,6,1,"","speaker_diarization_config"],[7,6,1,"","srt_overrides"],[7,6,1,"","translation_config"]],"speechmatics.models.BatchTranslationConfig":[[7,6,1,"","target_languages"]],"speechmatics.models.ClientMessageType":[[7,6,1,"","AddAudio"],[7,6,1,"","EndOfStream"],[7,6,1,"","SetRecognitionConfig"],[7,6,1,"","StartRecognition"]],"speechmatics.models.ConnectionSettings":[[7,6,1,"","auth_token"],[7,6,1,"","generate_temp_token"],[7,6,1,"","message_buffer_size"],[7,6,1,"","ping_timeout_seconds"],[7,6,1,"","semaphore_timeout_seconds"],[7,6,1,"","ssl_context"],[7,6,1,"","url"]],"speechmatics.models.FetchData":[[7,6,1,"","auth_headers"],[7,6,1,"","url"]],"speechmatics.models.NotificationConfig":[[7,6,1,"","auth_headers"],[7,6,1,"","contents"],[7,6,1,"","method"],[7,6,1,"","url"]],"speechmatics.models.RTSpeakerDiarizationConfig":[[7,6,1,"","max_speakers"]],"speechmatics.models.SRTOverrides":[[7,6,1,"","max_line_length"],[7,6,1,"","max_lines"]],"speechmatics.models.ServerMessageType":[[7,6,1,"","AddPartialTranscript"],[7,6,1,"","AddTranscript"],[7,6,1,"","AudioAdded"],[7,6,1,"","EndOfTranscript"],[7,6,1,"","Error"],[7,6,1,"","Info"],[7,6,1,"","RecognitionStarted"],[7,6,1,"","Warning"]],"speechmatics.models.TranscriptionConfig":[[7,6,1,"","enable_partials"],[7,6,1,"","max_delay"],[7,6,1,"","max_delay_mode"],[7,6,1,"","speaker_change_sensitivity"],[7,6,1,"","speaker_diarization_config"]],"speechmatics.models._TranscriptionConfig":[[7,6,1,"","additional_vocab"],[7,2,1,"","asdict"],[7,6,1,"","diarization"],[7,6,1,"","domain"],[7,6,1,"","enable_entities"],[7,6,1,"","language"],[7,6,1,"","operating_point"],[7,6,1,"","output_locale"],[7,6,1,"","punctuation_overrides"]],speechmatics:[[0,0,0,"-","batch_client"],[2,0,0,"-","client"],[3,0,0,"-","constants"],[4,0,0,"-","exceptions"],[5,0,0,"-","helpers"],[7,0,0,"-","models"]]},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","data","Python data"],"4":["py","exception","Python exception"],"5":["py","function","Python function"],"6":["py","attribute","Python attribute"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:data","4":"py:exception","5":"py:function","6":"py:attribute"},terms:{"0":[1,7],"1":[1,7],"100":[0,1],"12":1,"120":7,"168":1,"192":1,"2":[1,7],"20":1,"37":7,"404":0,"4096":[1,7],"44100":[1,7],"5":1,"512":[1,7],"5646":7,"60":7,"639":[1,7],"7":[0,1],"8":1,"9000":[1,2],"boolean":2,"byte":[0,1,5,7],"class":[0,1,2,7],"default":[0,1],"final":[1,7],"float":7,"function":[2,5],"import":6,"int":[5,7],"new":2,"return":[0,1,2,5,7],"true":[0,1,6,7],"while":[1,7],A:[2,5,7],FOR:1,For:[0,2],If:[1,2],It:0,The:[1,2,3,6,7],With:1,_transcriptionconfig:7,about:1,accept:[0,1,7],access:6,accur:1,acknowledg:7,acoust:[1,7],ad:[2,3,7],add:[1,2,6,7],add_event_handl:[2,6],add_middlewar:2,addaudio:[2,7],addit:[1,7],additional_vocab:[1,7],addpartialtranscript:[6,7],addtranscript:[2,6,7],advanc:[1,7],after:7,afterward:0,alia:0,all:[0,1,2,5,7],allow:[1,7],also:0,alter:2,an:[0,1,2,4,6,7],ani:[0,2,7],anymor:1,api:[0,1,2,3,6],appli:1,applianc:[0,1],applic:7,ar:[0,1,2,5,7],arg:2,argument:[2,6],as_config:7,asdict:7,asr:[0,1,2,3],associ:0,async:[2,5],asynchron:2,asyncio:2,asynciter:5,attach:7,au:7,audio:[0,1,2,5,6,7],audio_file_path:6,audio_set:2,audioad:7,audioset:[2,6,7],auth:[1,7],auth_head:7,auth_token:[0,1,6,7],authent:[1,7],author:[1,7],automat:[1,7],avail:[3,7],base:[2,7],batch:[0,3,6,7],batch_client:6,batch_self_service_url:3,batchclient:0,batchlanguageidentificationconfig:7,batchspeakerdiarizationconfig:7,batchtranscriptionconfig:[0,7],batchtranslationconfig:7,bearer:7,becom:7,been:[2,7],befor:1,begin:2,being:[2,6],below:6,best:2,between:7,binari:2,block:[0,2],bool:[0,2,7],buffer:[1,7],buffer_s:1,call:2,callabl:2,callback:[2,7],caller:2,can:[1,2,4,7],cancel:0,certif:1,chang:[1,5,7],channel:[1,7],channel_and_speaker_chang:1,channel_diarization_label:[1,7],charact:7,check:0,check_job_statu:0,choic:1,chunk:[1,5,7],chunk_siz:[1,5,7],clean:0,cli:6,client:[0,1,6,7],clientmessagetyp:7,close:0,code:[1,7],collect:5,com:[1,3,6],comma:1,compat:1,complet:[0,7],computation:1,conf:6,config:[0,2,7],config_fil:1,configur:[0,1,2,7],confirm:7,conjunct:2,connect:[0,2,7],connection_set:[0,2],connection_url:6,connectionset:[0,2,6,7],consol:1,constant:3,consum:2,content:[1,7],context:[0,1,7],copi:2,count:7,creat:[0,1,6],custom:[1,3,6,7],dai:[0,1],data:[1,2,5,7],de:1,debug:1,decor:5,def:6,defin:[6,7],del_non:5,delai:[1,7],delet:[0,5],delete_job:0,detect:[1,7],determin:7,diariz:[1,7],dict:[0,2,5,7],dictionari:5,directli:0,doc:2,doe:0,doesn:3,domain:[1,7],don:[2,6],e:[0,1,2,7],each:[1,5],earli:4,eg:[1,7],en:[1,2,6,7],enabl:[1,7],enable_ent:7,enable_parti:[6,7],encod:[1,7],encodingof:1,end:[2,3,4,7],endofstream:7,endoftranscript:7,endoftranscriptexcept:4,endpoint:[0,7],enforc:[1,7],enhanc:1,enterpris:[1,3,6,7],entiti:[1,7],entri:5,environ:1,error:[0,4,7],eu2:[3,6],event:[2,4,6],event_handl:[2,6],event_nam:[2,6],everi:2,exampl:[1,2,7],exceed:[2,7],except:[0,2,6],exclud:7,exist:0,expect:[1,7],expected_languag:7,expens:1,expos:2,f:6,factori:7,fail:0,fals:[0,1,7],fetch:7,fetch_data:7,fetchdata:7,field:2,file:[0,1,2,5,6,7],filenam:0,filepath:1,financ:[1,7],finish:[2,4,7],first:2,fix:[1,7],flag:1,flexibl:[1,7],foracknowledg:1,forc:[0,1,4],forceendsess:4,forcefulli:2,format:[0,1,7],found:[0,4],fr:1,from:[1,2,4,5,7],full:6,func:5,g:[0,1,7],gener:[1,7],generate_temp_token:[1,6,7],get:2,get_job_result:0,get_language_pack_info:2,github:6,given:2,global:1,gnocchi:1,h:1,ha:[2,4,7],handl:[0,1,2],handler:[2,4,6],have:3,header:7,helper:6,here:6,hertz:7,how:1,html:2,http:[0,1,2,3,7],httperror:0,httpx:0,hz:1,i:2,id:[0,1,4,7],identif:7,ignor:7,illustr:6,immedi:7,impli:2,includ:[2,7],incom:2,incomplet:7,increas:1,index:6,indic:[1,2,4,7],info:[1,7],inform:[1,2],initi:7,input:[1,5,7],insecur:1,instanti:2,intend:7,interact:2,interfac:[0,2],interpret:1,inth:1,invalid:0,invers:7,io:[2,5],iobas:[2,5],iso:[1,7],item:7,job:[0,4,7],job_id:[0,1],jobnotfoundexcept:[0,4],json:[0,1,5,7],json_utf8:5,just:2,kwarg:[0,2,7],label1:1,label2:1,label:[1,7],lambda:2,lang:1,languag:[1,2,3,6,7],language_identification_config:7,language_pack_info:2,larger:1,last:[0,1],latenc:1,later:1,latest:2,least:7,legaci:1,length:5,level:[1,7],librari:[0,2,3,4,5,7],like:[1,2,5],line:[1,7],list:[0,2,7],list_job:0,list_of_job:0,local:1,localhost:2,log:1,mai:[0,2,7],manag:[0,2],manifest:2,mark:[1,7],max:1,max_delai:[1,7],max_delay_mod:7,max_lin:7,max_line_length:7,max_sample_s:5,max_speak:7,maximum:[1,5,7],mean:7,messag:[1,2,7],message_buffer_s:7,metadata:6,method:[0,5,7],middl:1,middlewar:[2,4],min:1,mode:[6,7],model:[0,1,2,6],modul:6,more:[1,7],most:[2,7],msg:[2,6],much:1,mulaw:7,multipl:[5,7],must:[0,1,7],n:7,name:[2,6,7],need:[2,6],new_transcription_config:2,nochi:1,nokei:1,non:[1,3,7],none:[0,1,2,5,7],normal:7,note:3,notif:7,notification_config:7,notificationconfig:7,number:[1,7],oauth2:7,object:[0,2,5],one:1,onli:[1,2,7],open:[0,6],oper:1,operating_point:7,optim:7,option:[0,1,7],order:4,os:0,other:[0,1],out:2,outgo:2,output:[1,5,7],output_local:7,overhead:1,overrid:1,overriden:7,own:[1,7],pack:[1,2,7],page:6,paramet:[0,2,5,6,7],part:[6,7],partial:[1,6,7],particular:2,pass:[0,2],path:[0,6],pathlik:0,pcm_f32le:[1,7],pcm_s16le:7,per:7,permit:[1,7],piec:1,ping:7,ping_timeout_second:7,place:5,plaintext:1,pleas:6,point:1,pong:7,pool:0,possibl:[1,2],post:7,potenti:7,pre:1,prefer:2,preset:1,previous:[0,7],print:[1,2,6],print_partial_transcript:6,print_transcript:6,probabl:2,process:[1,2],produc:[2,7],producer_consum:2,product:1,provid:[1,6],puctuat:7,punctuat:[1,7],punctuation_overrid:7,punctuation_permitted_mark:1,punctuation_sensit:1,put:7,queri:7,rais:[0,2,4,5],rate:[1,7],rather:1,raw:[1,7],rb:6,re:[2,7],read:[1,2,5],read_in_chunk:5,readm:6,readthedoc:2,real:[1,2,7],realtim:[3,6],receipt:7,receiv:[1,2],recognisedent:1,recognit:[2,7],recognitionstart:[2,7],recurs:[5,7],redirect:1,refer:2,regist:6,regular:1,remov:1,repres:1,request:[0,1,7],requir:[0,1],respect:1,respond:2,respons:[0,7],rest:0,result:[0,2],retain:1,retriev:1,rfc:7,rt:[3,6,7],rt_self_service_url:3,rtspeakerdiarizationconfig:7,run:[0,1,2],run_synchron:[2,6],s:[1,5,7],saa:[0,1,7],sampl:[1,7],sample_r:[1,7],sc:1,search:6,second:[2,7],section:7,see:6,select:1,self:[1,3],semaphor:7,semaphore_timeout_second:7,send:[0,1,7],send_request:0,sensit:[1,7],sent:[2,7],separ:1,sequenc:5,server:[1,2,7],servermessagetyp:[2,6,7],servic:3,session:[2,4,7],set:[0,2,6,7],setrecognitionconfig:[2,7],share:1,should:[1,2,7],show:1,sign:1,simpl:[1,2],singl:[1,7],size:[1,5,7],small:1,sound:1,sourc:[0,2,4,5,7],space:[1,7],speaker:[1,7],speaker_chang:1,speaker_change_sensit:[1,7],speaker_diarization_config:7,speaker_diarization_max_speak:1,speaker_diarization_sensit:1,speaker_sensit:7,special:1,specif:7,specifi:[1,7],speech:1,srt:[0,1,7],srt_overrid:7,srtoverrid:7,ssl:[1,7],ssl_context:7,sslcontext:7,standard:[1,7],start:7,startrecognit:7,statu:[0,7],stderr:1,stdout:1,still:0,stop:2,str:[0,2,7],stream:[1,2,5,7],sub:6,submit:0,submit_job:0,subset:2,subtitl:7,successfulli:7,suppli:7,support:7,symbol:1,synchron:2,t:[2,3,6],target:[1,7],target_languag:7,task:2,temp:1,temporari:[1,7],termin:0,text:7,than:[0,1],thei:[2,7],thi:[0,1,2,5,6,7],threshold:7,time:[1,2,7],timeout:[2,7],timeouterror:2,token:[0,1,6,7],too:1,tool:6,transcrib:6,transcript:[0,1,2,4,6,7],transcription_config:[0,2],transcription_format:0,transcriptionconfig:[2,6,7],transcriptionerror:[0,4],translat:[1,7],translation_config:7,translation_target_languag:1,tupl:0,turn:5,txt:[0,1],type:[0,1,2,5,7],unbreak:1,union:0,unnecessari:1,unset:0,until:[0,2,7],up:0,updat:[1,2],update_transcription_config:2,url:[0,1,2,3,6,7],us:[0,1,2,3,4,5,7],usag:1,user:[2,4],util:5,v2:[0,1,2,3,6],v:1,valid:[0,1,2],valu:[1,5,7],valueerror:[2,5],variou:7,verbos:1,via:2,vocab:1,vocab_filepath:1,vocabulari:7,vv:1,wa:[1,4,5],wai:2,wait:1,wait_for_complet:0,warn:7,wav:6,waveform:6,we:[0,2],websocket:[1,2,7],websocketcli:[2,6],when:[0,1,2,7],where:[1,5,7],wherev:1,whether:[1,2,7],which:[0,1,2,5,7],white:7,wire:1,within:[0,1,7],word:7,work:1,would:7,wrap:7,wrapper:[0,2,6],ws:6,wss:[1,2,3,6],yet:2,yield:5,you:[0,1,2],your:[1,7]},titles:["speechmatics.batch_client","Speechmatics CLI Tool","speechmatics.client","speechmatics.client","speechmatics.exceptions","speechmatics.helpers","speechmatics-python","speechmatics.models"],titleterms:{argument:1,batch:1,batch_client:0,cli:1,client:[2,3],command:[1,6],config:1,delet:1,exampl:6,except:4,get:1,helper:5,indic:6,job:1,librari:6,line:6,list:1,mode:1,model:7,name:1,posit:1,python:6,refer:6,result:1,rt:1,set:1,speechmat:[0,1,2,3,4,5,6,7],statu:1,sub:1,submit:1,tabl:6,tool:1,transcrib:1,unset:1,usag:6}}) \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt index fc5bdbb..490a553 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,3 +9,4 @@ sphinx==4.4.0 pytest-cov==3.0.0 black==22.3.0 sphinx-argparse==0.4.0 +pytest_httpx==0.21.3 \ No newline at end of file diff --git a/speechmatics/cli.py b/speechmatics/cli.py index 2901652..057c6a7 100755 --- a/speechmatics/cli.py +++ b/speechmatics/cli.py @@ -36,6 +36,7 @@ BatchSpeakerDiarizationConfig, RTSpeakerDiarizationConfig, BatchTranslationConfig, + BatchLanguageIdentificationConfig, ) from speechmatics.cli_parser import ( parse_args, @@ -198,6 +199,11 @@ def get_transcription_config(args): # pylint: disable=too-many-branches # Ensure "en" is the default language as to not break existing API behavior. config = {"language": "en"} + # transcription_config is flattened in the BatchTranscriptionConfig, + # so the config entry from JSON must be flattened here, otherwise the JSON entry would be ignored + if config.get("transcription_config"): + config.update(config.pop("transcription_config")) + # Explicit command line arguments override values from config file. for option in [ "language", @@ -267,6 +273,12 @@ def get_transcription_config(args): # pylint: disable=too-many-branches target_languages=translation_target_languages.split(",") ) + if args.get("langid_expected_languages") is not None: + langid_expected_languages = args.get("langid_expected_languages") + config["language_identification_config"] = BatchLanguageIdentificationConfig( + expected_languages=langid_expected_languages.split(",") + ) + if args["mode"] == "rt": # pylint: disable=unexpected-keyword-arg return TranscriptionConfig(**config) diff --git a/speechmatics/cli_parser.py b/speechmatics/cli_parser.py index d9644c3..b9343f7 100644 --- a/speechmatics/cli_parser.py +++ b/speechmatics/cli_parser.py @@ -216,6 +216,14 @@ def get_arg_parser(): default=None, help=("Comma-separated list of languages to translate the transcription into"), ) + config_parser.add_argument( + "--langid-langs", + "--langid-languages", + dest="langid_expected_languages", + type=str, + default=None, + help=("Comma-separated list of expected languages for language identification"), + ) # Parent parser for output type output_format_parser = argparse.ArgumentParser(add_help=False) diff --git a/speechmatics/models.py b/speechmatics/models.py index 3b6461e..178e32e 100644 --- a/speechmatics/models.py +++ b/speechmatics/models.py @@ -143,6 +143,14 @@ class BatchTranslationConfig: """Target languages for which translation should be produced""" +@dataclass +class BatchLanguageIdentificationConfig: + """Batch mode: Language identification config.""" + + expected_languages: List[str] = None + """Expected languages for language identification""" + + @dataclass(init=False) class TranscriptionConfig(_TranscriptionConfig): """Real-time: Defines transcription parameters.""" @@ -179,6 +187,9 @@ class BatchTranscriptionConfig(_TranscriptionConfig): notification_config: NotificationConfig = None """Optional configuration for callback notification.""" + language_identification_config: BatchLanguageIdentificationConfig = None + """Optional configuration for language identification.""" + translation_config: BatchTranslationConfig = None """Optional configuration for translation.""" @@ -196,6 +207,9 @@ def as_config(self): fetch_data = dictionary.pop("fetch_data", None) notification_config = dictionary.pop("notification_config", None) + language_identification_config = dictionary.pop( + "language_identification_config", None + ) translation_config = dictionary.pop("translation_config", None) srt_overrides = dictionary.pop("srt_overrides", None) @@ -209,6 +223,9 @@ def as_config(self): notification_config = [notification_config] config["notification_config"] = notification_config + if language_identification_config: + config["language_identification_config"] = language_identification_config + if translation_config: config["translation_config"] = translation_config diff --git a/tests/data/batch-job-status.json b/tests/data/batch-job-status.json new file mode 100644 index 0000000..a13e9b0 --- /dev/null +++ b/tests/data/batch-job-status.json @@ -0,0 +1,18 @@ +{ + "job": { + "config": { + "fetch_data": { + "url": "https://smtooling.blob.core.windows.net/short-files/15sec.wav" + }, + "transcription_config": { + "language": "en" + }, + "type": "transcription" + }, + "created_at": "2023-02-22T14:33:02.221Z", + "data_name": "", + "duration": 15, + "id": "p8t3dcrign", + "status": "done" + } +} \ No newline at end of file diff --git a/tests/data/batch-job-transcript.json b/tests/data/batch-job-transcript.json new file mode 100644 index 0000000..c398a36 --- /dev/null +++ b/tests/data/batch-job-transcript.json @@ -0,0 +1,321 @@ +{ + "format": "2.8", + "job": { + "created_at": "2023-02-22T14:33:02.221Z", + "data_name": "", + "duration": 15, + "id": "p8t3dcrign" + }, + "metadata": { + "created_at": "2023-02-22T14:33:02.221Z", + "language_pack_info": { + "adapted": false, + "itn": true, + "language_description": "English", + "word_delimiter": "", + "writing_direction": "left-to-right" + }, + "transcription_config": { + "language": "en" + }, + "type": "transcription" + }, + "results": [ + { + "alternatives": [ + { + "confidence": 1.0, + "content": "Tidy", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 1.83, + "start_time": 1.41, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 1.0, + "content": "things", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 2.1, + "start_time": 1.83, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 1.0, + "content": "up", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 2.37, + "start_time": 2.1, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 1.0, + "content": ".", + "language": "en", + "speaker": "UU" + } + ], + "attaches_to": "previous", + "end_time": 2.37, + "is_eos": true, + "start_time": 2.37, + "type": "punctuation" + }, + { + "alternatives": [ + { + "confidence": 1.0, + "content": "Place", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 3.75, + "start_time": 3.42, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 0.87, + "content": "your", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 3.84, + "start_time": 3.75, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 0.87, + "content": "shelves", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 4.23, + "start_time": 3.84, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 1.0, + "content": "on", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 4.35, + "start_time": 4.23, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 1.0, + "content": "the", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 4.44, + "start_time": 4.35, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 0.98, + "content": "rungs", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 5.01, + "start_time": 4.44, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 1.0, + "content": ".", + "language": "en", + "speaker": "UU" + } + ], + "attaches_to": "previous", + "end_time": 5.01, + "is_eos": true, + "start_time": 5.01, + "type": "punctuation" + }, + { + "alternatives": [ + { + "confidence": 0.51, + "content": "Ensure", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 6.6, + "start_time": 6.13, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 0.54, + "content": "the", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 6.69, + "start_time": 6.6, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 1.0, + "content": ".", + "language": "en", + "speaker": "UU" + } + ], + "attaches_to": "previous", + "end_time": 6.69, + "is_eos": true, + "start_time": 6.69, + "type": "punctuation" + }, + { + "alternatives": [ + { + "confidence": 1.0, + "content": "Fill", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 8.91, + "start_time": 8.49, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 1.0, + "content": "with", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 9.09, + "start_time": 8.91, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 1.0, + "content": "your", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 9.21, + "start_time": 9.09, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 0.6, + "content": "favourite", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 9.84, + "start_time": 9.21, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 1.0, + "content": "things", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 10.47, + "start_time": 9.84, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 1.0, + "content": ".", + "language": "en", + "speaker": "UU" + } + ], + "attaches_to": "previous", + "end_time": 10.47, + "is_eos": true, + "start_time": 10.47, + "type": "punctuation" + }, + { + "alternatives": [ + { + "confidence": 0.26, + "content": "Whyalla", + "language": "en", + "speaker": "UU" + } + ], + "end_time": 12.3, + "start_time": 11.74, + "type": "word" + }, + { + "alternatives": [ + { + "confidence": 1.0, + "content": ".", + "language": "en", + "speaker": "UU" + } + ], + "attaches_to": "previous", + "end_time": 12.3, + "is_eos": true, + "start_time": 12.3, + "type": "punctuation" + } + ] +} diff --git a/tests/test_cli.py b/tests/test_cli.py index cef11dc..ee2fa8b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -234,6 +234,16 @@ "output_format": "json-v2", }, ), + ( + [ + "batch", + "submit", + "--langid-langs=de,es,cs", + ], + { + "langid_expected_languages": "de,es,cs", + }, + ), ], ) def test_cli_arg_parse_with_file(args, values): diff --git a/tests/test_client.py b/tests/test_client.py index aa9c834..ab86416 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -9,14 +9,18 @@ import asynctest import pytest +from pytest_httpx import HTTPXMock from speechmatics import client +from speechmatics.batch_client import BatchClient from speechmatics.exceptions import ForceEndSession from speechmatics.models import ( ConnectionSettings, ServerMessageType, ClientMessageType, RTSpeakerDiarizationConfig, + BatchTranscriptionConfig, ) + from tests.utils import path_to_test_resource, default_ws_client_setup @@ -537,6 +541,41 @@ def test_language_pack_info_is_stored(mock_server): assert info["language_code"] == "en" +def test_batch_mock_jobs(httpx_mock: HTTPXMock): + # submit job + httpx_mock.add_response(content=b'{"id":"p8t3dcrign"}') + + # check job status + with open(path_to_test_resource("batch-job-status.json"), "rb") as file: + job_status = file.read() + httpx_mock.add_response(content=job_status) + + # get job result + with open(path_to_test_resource("batch-job-transcript.json"), "rb") as file: + transcript = file.read() + httpx_mock.add_response(content=transcript) + + settings = ConnectionSettings( + url="https://speechmatics.com/foo/v2", + auth_token="bar", + ) + + conf = BatchTranscriptionConfig( + language="en", + ) + + with BatchClient(settings) as batch_client: + job_id = batch_client.submit_job( + audio=("foo", b"some audio data"), + transcription_config=conf, + ) + + actual_transcript = batch_client.wait_for_completion( + job_id, transcription_format="txt" + ) + assert transcript.decode("utf-8") == actual_transcript + + def deepcopy_state(obj): """ Return a deepcopy of the __dict__ (or state) of an object but ignore