Skip to content

Commit

Permalink
Resolve
Browse files Browse the repository at this point in the history
  • Loading branch information
pseudotensor committed Nov 20, 2024
2 parents d7b0fe7 + 2e830cc commit 00a32b4
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 13 deletions.
28 changes: 18 additions & 10 deletions openai_server/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,9 @@ async def get_response(chunk_response=True, **kwargs):
import ast

stream_output = kwargs.get('stream_output', True)
stream_output_orig = stream_output
# always force streaming to avoid blocking server
stream_output = True
verbose = kwargs.get('verbose', False)

kwargs = convert_gen_kwargs(kwargs)
Expand All @@ -285,11 +288,12 @@ async def get_response(chunk_response=True, **kwargs):
for num in range(job_outputs_num_new):
chunk, response, res_dict = get_chunk(outputs_list, job_outputs_num, last_response, num,
verbose=verbose)
if chunk_response:
if chunk:
yield chunk
else:
yield response
if stream_output_orig:
if chunk_response:
if chunk:
yield chunk
else:
yield response
last_response = response
await asyncio.sleep(0.005)
await asyncio.sleep(0.005)
Expand All @@ -299,14 +303,18 @@ async def get_response(chunk_response=True, **kwargs):
job_outputs_num_new = len(outputs_list[job_outputs_num:])
for num in range(job_outputs_num_new):
chunk, response, res_dict = get_chunk(outputs_list, job_outputs_num, last_response, num, verbose=verbose)
if chunk_response:
if chunk:
yield chunk
else:
yield response
if stream_output_orig:
if chunk_response:
if chunk:
yield chunk
else:
yield response
last_response = response
await asyncio.sleep(0.005)
job_outputs_num += job_outputs_num_new
if not stream_output_orig:
# behave as if not streaming
yield res_dict['response']
if verbose:
logger.info("total job_outputs_num=%d" % job_outputs_num)
else:
Expand Down
3 changes: 2 additions & 1 deletion reqs_optional/reqs_constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ transformers>=4.45.1
tenacity==8.3.0
pydantic==2.7.0
# rust failure with 3.10.7
orjson==3.10.6
orjson==3.10.6
huggingface-hub==0.25.2
2 changes: 1 addition & 1 deletion src/prompter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1935,7 +1935,7 @@ def get_vllm_extra_dict(tokenizer, stop_sequences=[], repetition_penalty=None,
if repetition_penalty is not None:
vllm_extra_dict['extra_body'].update(repetition_penalty=repetition_penalty)

if response_format and response_format != 'text':
if response_format and response_format != 'text' and guided_json:
vllm_extra_dict['extra_body'].update(dict(response_format={'type': response_format}))
if guided_json:
vllm_extra_dict['extra_body'].update(guided_json=guided_json)
Expand Down
2 changes: 1 addition & 1 deletion src/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "00186dc65d5656c6ae30fc6bb71130a0879e7c64"
__version__ = "d7b0fe7efd0f587deef882e3dab1e9bd3f72de2a"

0 comments on commit 00a32b4

Please sign in to comment.