Skip to content

Commit

Permalink
Fix the wrong Content-Length in python-server.py for non-ascii charac…
Browse files Browse the repository at this point in the history
…ters. (microsoft#24480)

Resolves: microsoft#24479

`python-server.py` currently uses `sys.stdin.read` for reading the
input, and it receives the length in `str` (utf-8 string).
ref: https://docs.python.org/3/library/sys.html

On the other "Content-Length" is the size in **bytes**, therefore we
should not pass `content_length` to `sys.stdin.read`. For example,
`print("こんにちは世界")`'s length is 16 in str, but 30 in bytes.

```
>>> len('print("こんにちは世界")')
16
>>> len('print("こんにちは世界")'.encode())
30
```

This PR have two changes.
1. Replace `sys.stdin.read(content_length)` with
`sys.stdin.buffer.read(content_length).decode()`.
2. Make `_send_message` calculate "Content-Length" from bytes, not str.

By these changes, original issue
microsoft#24479 can be resolved.


![image](https://github.com/user-attachments/assets/20e72a26-d4ad-4e16-9c5b-ed41055c95d9)
  • Loading branch information
tomoki authored Nov 26, 2024
1 parent 42b63b9 commit dba0a4c
Showing 1 changed file with 9 additions and 5 deletions.
14 changes: 9 additions & 5 deletions python_files/python_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@


def _send_message(msg: str):
length_msg = len(msg)
# Content-Length is the data size in bytes.
length_msg = len(msg.encode())
STDOUT.buffer.write(f"Content-Length: {length_msg}\r\n\r\n{msg}".encode())
STDOUT.buffer.flush()

Expand Down Expand Up @@ -55,10 +56,11 @@ def custom_input(prompt=""):
try:
send_request({"prompt": prompt})
headers = get_headers()
# Content-Length is the data size in bytes.
content_length = int(headers.get("Content-Length", 0))

if content_length:
message_text = STDIN.read(content_length)
message_text = STDIN.buffer.read(content_length).decode()
message_json = json.loads(message_text)
return message_json["result"]["userInput"]
except Exception:
Expand All @@ -74,10 +76,11 @@ def handle_response(request_id):
while not STDIN.closed:
try:
headers = get_headers()
# Content-Length is the data size in bytes.
content_length = int(headers.get("Content-Length", 0))

if content_length:
message_text = STDIN.read(content_length)
message_text = STDIN.buffer.read(content_length).decode()
message_json = json.loads(message_text)
our_user_input = message_json["result"]["userInput"]
if message_json["id"] == request_id:
Expand Down Expand Up @@ -160,7 +163,7 @@ def get_value(self) -> str:
def get_headers():
headers = {}
while True:
line = STDIN.readline().strip()
line = STDIN.buffer.readline().decode().strip()
if not line:
break
name, value = line.split(":", 1)
Expand All @@ -172,10 +175,11 @@ def get_headers():
while not STDIN.closed:
try:
headers = get_headers()
# Content-Length is the data size in bytes.
content_length = int(headers.get("Content-Length", 0))

if content_length:
request_text = STDIN.read(content_length)
request_text = STDIN.buffer.read(content_length).decode()
request_json = json.loads(request_text)
if request_json["method"] == "execute":
execute(request_json, USER_GLOBALS)
Expand Down

0 comments on commit dba0a4c

Please sign in to comment.