From 221939f7d26093589c987b669274f4508f7401ac Mon Sep 17 00:00:00 2001 From: Arno Date: Sat, 5 Oct 2024 12:49:17 +0800 Subject: [PATCH] use ensure_ascii=False in json.dumps to fix dump non-ascii character ensure_ascii=False --- src/ell/lmp/_track.py | 2 +- src/ell/lmp/tool.py | 2 +- src/ell/providers/anthropic.py | 2 +- src/ell/providers/bedrock.py | 2 +- src/ell/providers/openai.py | 2 +- src/ell/stores/sql.py | 2 +- src/ell/types/studio.py | 4 +++- src/ell/util/serialization.py | 6 +++--- x/openai_realtime/src/openai_realtime/api.py | 2 +- x/openai_realtime/src/openai_realtime/client.py | 4 ++-- 10 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/ell/lmp/_track.py b/src/ell/lmp/_track.py index 4d2ef43a..0ea8674e 100644 --- a/src/ell/lmp/_track.py +++ b/src/ell/lmp/_track.py @@ -219,7 +219,7 @@ def _write_invocation(func, invocation_id, latency_ms, prompt_tokens, completion # Write to the blob store blob_id = config.store.blob_store.store_blob( json.dumps(invocation_contents.model_dump( - ), default=str).encode('utf-8'), + ), default=str, ensure_ascii=False).encode('utf-8'), invocation_id ) invocation_contents = InvocationContents( diff --git a/src/ell/lmp/tool.py b/src/ell/lmp/tool.py index a2be916a..ccdaa8d0 100644 --- a/src/ell/lmp/tool.py +++ b/src/ell/lmp/tool.py @@ -54,7 +54,7 @@ def wrapper( elif isinstance(result, list) and all(isinstance(c, ContentBlock) for c in result): content_results = result else: - content_results = [ContentBlock(text=_lstr(json.dumps(result),origin_trace=_invocation_origin))] + content_results = [ContentBlock(text=_lstr(json.dumps(result, ensure_ascii=False),origin_trace=_invocation_origin))] except TypeError as e: raise TypeError(f"Failed to convert tool use result to ContentBlock: {e}. Tools must return json serializable objects. or a list of ContentBlocks.") # XXX: Need to support images and other content types somehow. We should look for images inside of the the result and then go from there. diff --git a/src/ell/providers/anthropic.py b/src/ell/providers/anthropic.py index a4ce2816..2002f0ea 100644 --- a/src/ell/providers/anthropic.py +++ b/src/ell/providers/anthropic.py @@ -188,7 +188,7 @@ def _content_block_to_anthropic_format(content_block: ContentBlock): if (image := content_block.image): return serialize_image_for_anthropic(image) elif ((text := content_block.text) is not None): return dict(type="text", text=text) elif (parsed := content_block.parsed): - return dict(type="text", text=json.dumps(parsed.model_dump())) + return dict(type="text", text=json.dumps(parsed.model_dump(), ensure_ascii=False)) elif (tool_call := content_block.tool_call): return dict( type="tool_use", diff --git a/src/ell/providers/bedrock.py b/src/ell/providers/bedrock.py index 08f481c0..77bc57c9 100644 --- a/src/ell/providers/bedrock.py +++ b/src/ell/providers/bedrock.py @@ -198,7 +198,7 @@ def content_block_to_bedrock_format(content_block: ContentBlock) -> Dict[str, An elif content_block.parsed: return { "type": "text", - "text": json.dumps(content_block.parsed.model_dump()) + "text": json.dumps(content_block.parsed.model_dump(), ensure_ascii=False) } elif content_block.tool_call: return { diff --git a/src/ell/providers/openai.py b/src/ell/providers/openai.py index 10db5fac..4f99ccfa 100644 --- a/src/ell/providers/openai.py +++ b/src/ell/providers/openai.py @@ -64,7 +64,7 @@ def translate_to_provider(self, ell_call : EllCallParams) -> Dict[str, Any]: type="function", function=dict( name=tool_call.tool.__name__, - arguments=json.dumps(tool_call.params.model_dump()) + arguments=json.dumps(tool_call.params.model_dump(), ensure_ascii=False) ) ) for tool_call in tool_calls ], role="assistant", diff --git a/src/ell/stores/sql.py b/src/ell/stores/sql.py index c96397e1..32d6afd3 100644 --- a/src/ell/stores/sql.py +++ b/src/ell/stores/sql.py @@ -21,7 +21,7 @@ class SQLStore(ell.store.Store): def __init__(self, db_uri: str, blob_store: Optional[ell.store.BlobStore] = None): self.engine = create_engine(db_uri, json_serializer=lambda obj: json.dumps(pydantic_ltype_aware_cattr.unstructure(obj), - sort_keys=True, default=repr)) + sort_keys=True, default=repr, ensure_ascii=False)) SQLModel.metadata.create_all(self.engine) self.open_files: Dict[str, Dict[str, Any]] = {} diff --git a/src/ell/types/studio.py b/src/ell/types/studio.py index 4ca0468f..da246402 100644 --- a/src/ell/types/studio.py +++ b/src/ell/types/studio.py @@ -139,7 +139,9 @@ def should_externalize(self) -> bool: ] total_size = sum( - len(json.dumps(field, default=(lambda x: json.dumps(x.model_dump(), default=str) if isinstance(x, BaseModel) else str(x))).encode('utf-8')) for field in json_fields if field is not None + len(json.dumps(field, default=(lambda x: json.dumps(x.model_dump(), default=str, ensure_ascii=False) + if isinstance(x, BaseModel) else str(x)), ensure_ascii=False).encode('utf-8')) + for field in json_fields if field is not None ) # print("total_size", total_size) diff --git a/src/ell/util/serialization.py b/src/ell/util/serialization.py index fbb53a11..855004ca 100644 --- a/src/ell/util/serialization.py +++ b/src/ell/util/serialization.py @@ -91,8 +91,8 @@ def handle_complex_types(obj): def compute_state_cache_key(ipstr, fn_closure): - _global_free_vars_str = f"{json.dumps(get_immutable_vars(fn_closure[2]), sort_keys=True, default=repr)}" - _free_vars_str = f"{json.dumps(get_immutable_vars(fn_closure[3]), sort_keys=True, default=repr)}" + _global_free_vars_str = f"{json.dumps(get_immutable_vars(fn_closure[2]), sort_keys=True, default=repr, ensure_ascii=False)}" + _free_vars_str = f"{json.dumps(get_immutable_vars(fn_closure[3]), sort_keys=True, default=repr, ensure_ascii=False)}" state_cache_key = hashlib.sha256(f"{ipstr}{_global_free_vars_str}{_free_vars_str}".encode('utf-8')).hexdigest() return state_cache_key @@ -103,7 +103,7 @@ def prepare_invocation_params(params): cleaned_invocation_params = pydantic_ltype_aware_cattr.unstructure(invocation_params) # Thisis because we wneed the caching to work on the hash of a cleaned and serialized object. - jstr = json.dumps(cleaned_invocation_params, sort_keys=True, default=repr) + jstr = json.dumps(cleaned_invocation_params, sort_keys=True, default=repr, ensure_ascii=False) consumes = set() import re diff --git a/x/openai_realtime/src/openai_realtime/api.py b/x/openai_realtime/src/openai_realtime/api.py index 232bfab6..d9475fd8 100644 --- a/x/openai_realtime/src/openai_realtime/api.py +++ b/x/openai_realtime/src/openai_realtime/api.py @@ -77,5 +77,5 @@ def send(self, event_name, data=None): self.dispatch("client.*", event) self.log("sent:", event_name, event) - asyncio.create_task(self.ws.send(json.dumps(event))) + asyncio.create_task(self.ws.send(json.dumps(event, ensure_ascii=False))) return True \ No newline at end of file diff --git a/x/openai_realtime/src/openai_realtime/client.py b/x/openai_realtime/src/openai_realtime/client.py index 3bbfc7b3..0fafb5c1 100644 --- a/x/openai_realtime/src/openai_realtime/client.py +++ b/x/openai_realtime/src/openai_realtime/client.py @@ -243,7 +243,7 @@ async def _call_tool(self, tool): 'item': { 'type': 'function_call_output', 'call_id': tool['call_id'], - 'output': json.dumps(result) + 'output': json.dumps(result, ensure_ascii=False) } }) except Exception as e: @@ -251,7 +251,7 @@ async def _call_tool(self, tool): 'item': { 'type': 'function_call_output', 'call_id': tool['call_id'], - 'output': json.dumps({'error': str(e)}) + 'output': json.dumps({'error': str(e)}, ensure_ascii=False) } }) self.create_response() \ No newline at end of file