diff --git a/examples/openai_embedding_client.py b/examples/openai_embedding_client.py index b73360fe15a24..b4f4c7ad6beb2 100644 --- a/examples/openai_embedding_client.py +++ b/examples/openai_embedding_client.py @@ -13,11 +13,14 @@ models = client.models.list() model = models.data[0].id -responses = client.embeddings.create(input=[ - "Hello my name is", - "The best thing about vLLM is that it supports many different models" -], - model=model) +responses = client.embeddings.create( + input=[ + "Hello my name is", + "The best thing about vLLM is that it supports many different models" + ], + model=model, + encoding_format="float", +) for data in responses.data: print(data.embedding) # list of float of len 4096 diff --git a/tests/entrypoints/openai/test_embedding.py b/tests/entrypoints/openai/test_embedding.py index 2ca0c0d63c25c..c9747339bbf15 100644 --- a/tests/entrypoints/openai/test_embedding.py +++ b/tests/entrypoints/openai/test_embedding.py @@ -18,7 +18,6 @@ def embedding_server(): "--enforce-eager", "--max-model-len", "8192", - "--enforce-eager", ] with RemoteOpenAIServer(EMBEDDING_MODEL_NAME, args) as remote_server: