From e96dc673984593db5e27709591f22b8ae13e9ff7 Mon Sep 17 00:00:00 2001 From: Frank Liu Date: Fri, 1 Dec 2023 14:58:29 -0800 Subject: [PATCH] [docs] Adds option.max_output_size document (#1354) --- serving/docs/configurations_model.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/serving/docs/configurations_model.md b/serving/docs/configurations_model.md index 394a98310..a844cda86 100644 --- a/serving/docs/configurations_model.md +++ b/serving/docs/configurations_model.md @@ -58,7 +58,9 @@ option.disablePerSessionThreads=true option.ortDevice=TensorRT/ROCM/CoreML # Python model options -retry_threshold=10 # Mark model as failure after python process crashing 10 times +# Mark model as failure after python process crashing 10 times +retry_threshold=10 + option.pythonExecutable=python3 option.entryPoint=deepspeed.py option.handler=hanlde @@ -72,6 +74,9 @@ option.rolling_batch=auto option.max_rolling_batch_size=64 option.paged_attention=false option.max_rolling_batch_prefill_tokens=1088 + +# max output size in bytes, default to 60M +option.max_output_size=67108864 ``` Most of the options can also be overriden by an environment variable with the `OPTION_` prefix and all caps.