diff --git a/clients/python/llmengine/__init__.py b/clients/python/llmengine/__init__.py index 15b836da4..b2ea471a0 100644 --- a/clients/python/llmengine/__init__.py +++ b/clients/python/llmengine/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.0.0b33" +__version__ = "0.0.0b34" import os from typing import Sequence diff --git a/clients/python/llmengine/completion.py b/clients/python/llmengine/completion.py index 01aa86a94..4cbbaf751 100644 --- a/clients/python/llmengine/completion.py +++ b/clients/python/llmengine/completion.py @@ -47,6 +47,7 @@ async def acreate( guided_json: Optional[Dict[str, Any]] = None, guided_regex: Optional[str] = None, guided_choice: Optional[List[str]] = None, + guided_grammar: Optional[str] = None, timeout: int = COMPLETION_TIMEOUT, stream: bool = False, ) -> Union[CompletionSyncResponse, AsyncIterable[CompletionStreamResponse]]: @@ -118,6 +119,9 @@ async def acreate( guided_choice (Optional[List[str]]): If specified, the output will be exactly one of the choices. + guided_grammar (Optional[str]): + If specified, the output will follow the context-free grammar provided. + timeout (int): Timeout in seconds. This is the maximum amount of time you are willing to wait for a response. @@ -218,6 +222,7 @@ async def _acreate_stream( guided_json=guided_json, guided_regex=guided_regex, guided_choice=guided_choice, + guided_grammar=guided_grammar, timeout=timeout, ) @@ -242,6 +247,11 @@ async def _acreate_sync(**kwargs) -> CompletionSyncResponse: frequency_penalty=frequency_penalty, top_k=top_k, top_p=top_p, + include_stop_str_in_output=include_stop_str_in_output, + guided_json=guided_json, + guided_regex=guided_regex, + guided_choice=guided_choice, + guided_grammar=guided_grammar, ) @classmethod @@ -261,6 +271,7 @@ def create( guided_json: Optional[Dict[str, Any]] = None, guided_regex: Optional[str] = None, guided_choice: Optional[List[str]] = None, + guided_grammar: Optional[str] = None, timeout: int = COMPLETION_TIMEOUT, stream: bool = False, ) -> Union[CompletionSyncResponse, Iterator[CompletionStreamResponse]]: @@ -333,6 +344,9 @@ def create( guided_choice (Optional[List[str]]): If specified, the output will be exactly one of the choices. + guided_grammar (Optional[str]): + If specified, the output will follow the context-free grammar provided. + timeout (int): Timeout in seconds. This is the maximum amount of time you are willing to wait for a response. @@ -419,6 +433,11 @@ def _create_stream(**kwargs): frequency_penalty=frequency_penalty, top_k=top_k, top_p=top_p, + include_stop_str_in_output=include_stop_str_in_output, + guided_json=guided_json, + guided_regex=guided_regex, + guided_choice=guided_choice, + guided_grammar=guided_grammar, ) else: @@ -436,6 +455,7 @@ def _create_stream(**kwargs): guided_json=guided_json, guided_regex=guided_regex, guided_choice=guided_choice, + guided_grammar=guided_grammar, ).dict() response = cls.post_sync( resource_name=f"v1/llm/completions-sync?model_endpoint_name={model}", diff --git a/clients/python/llmengine/data_types.py b/clients/python/llmengine/data_types.py index bcaf41129..f1c9b56c9 100644 --- a/clients/python/llmengine/data_types.py +++ b/clients/python/llmengine/data_types.py @@ -331,6 +331,7 @@ class CompletionSyncV1Request(BaseModel): guided_json: Optional[Dict[str, Any]] = Field(default=None) guided_regex: Optional[str] = Field(default=None) guided_choice: Optional[List[str]] = Field(default=None) + guided_grammar: Optional[str] = Field(default=None) class TokenOutput(BaseModel): @@ -405,6 +406,7 @@ class CompletionStreamV1Request(BaseModel): guided_json: Optional[Dict[str, Any]] = Field(default=None) guided_regex: Optional[str] = Field(default=None) guided_choice: Optional[List[str]] = Field(default=None) + guided_grammar: Optional[str] = Field(default=None) class CompletionStreamOutput(BaseModel): diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index 7d645b53d..910fa162d 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "scale-llm-engine" -version = "0.0.0.beta33" +version = "0.0.0.beta34" description = "Scale LLM Engine Python client" license = "Apache-2.0" authors = ["Phil Chen "] diff --git a/clients/python/setup.py b/clients/python/setup.py index c11111cfd..c8d30e11a 100644 --- a/clients/python/setup.py +++ b/clients/python/setup.py @@ -3,7 +3,7 @@ setup( name="scale-llm-engine", python_requires=">=3.7", - version="0.0.0.beta33", + version="0.0.0.beta34", packages=find_packages(), package_data={"llmengine": ["py.typed"]}, )