From d614094c473145cab55759eb27199358641e8f75 Mon Sep 17 00:00:00 2001 From: Michael Feil <63565275+michaelfeil@users.noreply.github.com> Date: Tue, 10 Dec 2024 00:22:38 -0800 Subject: [PATCH] new-release? (#494) * new-release? * rm conint * fix: optimum classifier * lint --- docs/assets/openapi.json | 2 +- .../models/open_ai_embedding_input_audio.py | 9 +++++++ .../models/open_ai_embedding_input_image.py | 9 +++++++ .../models/open_ai_embedding_input_text.py | 9 +++++++ .../infinity_client/pyproject.toml | 2 +- .../infinity_emb/fastapi_schemas/pymodels.py | 2 +- .../infinity_emb/inference/batch_handler.py | 25 ++++++++++++------- .../infinity_emb/infinity_server.py | 18 ++++++------- libs/infinity_emb/infinity_emb/primitives.py | 4 +++ .../infinity_emb/transformer/audio/utils.py | 10 +++----- .../transformer/classifier/optimum.py | 7 +++--- .../infinity_emb/transformer/vision/utils.py | 8 ++---- libs/infinity_emb/pyproject.toml | 2 +- .../classifier/test_optimum_classifier.py | 10 +++----- 14 files changed, 69 insertions(+), 48 deletions(-) diff --git a/docs/assets/openapi.json b/docs/assets/openapi.json index 4689745a..fe6a9faf 100644 --- a/docs/assets/openapi.json +++ b/docs/assets/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.70"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings. Supports with multimodal inputs. Aligned with OpenAI Embeddings API.\n\n## Running Text Embeddings\n```python\nimport requests, base64\nrequests.post(\"http://..:7997/embeddings\",\n json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})\n```\n\n## Running Image Embeddings\n```python\nrequests.post(\"http://..:7997/embeddings\",\n json={\n \"model\": \"openai/clip-vit-base-patch32\",\n \"encoding_format\": \"base64\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n # can also be base64 encoded\n ],\n # set extra modality to image to process as image\n \"modality\": \"image\"\n)\n```\n\n## Running Audio Embeddings\n```python\nimport requests, base64\nurl = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\"\n\ndef url_to_base64(url, modality = \"image\"):\n '''small helper to convert url to base64 without server requiring access to the url'''\n response = requests.get(url)\n response.raise_for_status()\n base64_encoded = base64.b64encode(response.content).decode('utf-8')\n mimetype = f\"{modality}/{url.split('.')[-1]}\"\n return f\"data:{mimetype};base64,{base64_encoded}\"\n\nrequests.post(\"http://localhost:7997/embeddings\",\n json={\n \"model\": \"laion/larger_clap_general\",\n \"encoding_format\": \"float\",\n \"input\": [\n url, url_to_base64(url, \"audio\")\n ],\n # set extra modality to audio to process as audio\n \"modality\": \"audio\"\n }\n)\n```\n\n## Running via OpenAI Client\n```python\nfrom openai import OpenAI # pip install openai==1.51.0\nclient = OpenAI(base_url=\"http://localhost:7997/\")\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[url_to_base64(url, \"audio\")],\n encoding_format=\"float\",\n extra_body={\n \"modality\": \"audio\"\n }\n)\n\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[\"the sound of a beep\", \"the sound of a cat\"],\n encoding_format=\"base64\", # base64: optional high performance setting\n extra_body={\n \"modality\": \"text\"\n }\n)\n```\n\n### Hint: Run all the above models on one server:\n```bash\ninfinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general\n```","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MultiModalOpenAIEmbedding"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents. Aligned with Cohere API (https://docs.cohere.com/reference/rerank)\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n json={\n \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n \"query\":\"Where is Munich?\",\n \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n })\n```","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})\n```","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `image`","description":"Encode Embeddings from Image files\n\nSupports URLs of Images and Base64-encoded Images\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n json={\n \"model\":\"openai/clip-vit-base-patch32\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n \"\"\n ]\n })\n```","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/embeddings_audio":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `audio`","description":"Encode Embeddings from Audio files\n\nSupports URLs of Audios and Base64-encoded Audios\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n json={\n \"model\":\"laion/larger_clap_general\",\n \"input\": [\n \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\",\n \"data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO\"\n ]\n })\n```","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"MultiModalOpenAIEmbedding":{"oneOf":[{"$ref":"#/components/schemas/_OpenAIEmbeddingInput_Text"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Audio"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Image"}],"title":"MultiModalOpenAIEmbedding"},"OpenAIEmbeddingInput_Audio":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["audio"],"const":"audio","title":"Modality","default":"audio"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Audio"},"OpenAIEmbeddingInput_Image":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["image"],"const":"image","title":"Modality","default":"image"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Image"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["list"],"const":"list","title":"Object","default":"list"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"},"top_n":{"anyOf":[{"type":"integer","exclusiveMinimum":0.0},{"type":"null"}],"title":"Top N"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"},{"items":{"items":{"type":"number"},"type":"array"},"type":"array"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_OpenAIEmbeddingInput_Text":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"modality":{"type":"string","enum":["text"],"const":"text","title":"Modality","default":"text"}},"type":"object","required":["input"],"title":"_OpenAIEmbeddingInput_Text","description":"helper"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}} \ No newline at end of file +{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.73"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings. Supports with multimodal inputs. Aligned with OpenAI Embeddings API.\n\n## Running Text Embeddings\n```python\nimport requests, base64\nrequests.post(\"http://..:7997/embeddings\",\n json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})\n```\n\n## Running Image Embeddings\n```python\nrequests.post(\"http://..:7997/embeddings\",\n json={\n \"model\": \"openai/clip-vit-base-patch32\",\n \"encoding_format\": \"base64\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n # can also be base64 encoded\n ],\n # set extra modality to image to process as image\n \"modality\": \"image\"\n)\n```\n\n## Running Audio Embeddings\n```python\nimport requests, base64\nurl = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\"\n\ndef url_to_base64(url, modality = \"image\"):\n '''small helper to convert url to base64 without server requiring access to the url'''\n response = requests.get(url)\n response.raise_for_status()\n base64_encoded = base64.b64encode(response.content).decode('utf-8')\n mimetype = f\"{modality}/{url.split('.')[-1]}\"\n return f\"data:{mimetype};base64,{base64_encoded}\"\n\nrequests.post(\"http://localhost:7997/embeddings\",\n json={\n \"model\": \"laion/larger_clap_general\",\n \"encoding_format\": \"float\",\n \"input\": [\n url, url_to_base64(url, \"audio\")\n ],\n # set extra modality to audio to process as audio\n \"modality\": \"audio\"\n }\n)\n```\n\n## Running via OpenAI Client\n```python\nfrom openai import OpenAI # pip install openai==1.51.0\nclient = OpenAI(base_url=\"http://localhost:7997/\")\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[url_to_base64(url, \"audio\")],\n encoding_format=\"float\",\n extra_body={\n \"modality\": \"audio\"\n }\n)\n\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[\"the sound of a beep\", \"the sound of a cat\"],\n encoding_format=\"base64\", # base64: optional high performance setting\n extra_body={\n \"modality\": \"text\"\n }\n)\n```\n\n### Hint: Run all the above models on one server:\n```bash\ninfinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general\n```","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MultiModalOpenAIEmbedding"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents. Aligned with Cohere API (https://docs.cohere.com/reference/rerank)\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n json={\n \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n \"query\":\"Where is Munich?\",\n \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n })\n```","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})\n```","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `image`","description":"Encode Embeddings from Image files\n\nSupports URLs of Images and Base64-encoded Images\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n json={\n \"model\":\"openai/clip-vit-base-patch32\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n \"\"\n ]\n })\n```","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/embeddings_audio":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `audio`","description":"Encode Embeddings from Audio files\n\nSupports URLs of Audios and Base64-encoded Audios\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n json={\n \"model\":\"laion/larger_clap_general\",\n \"input\": [\n \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\",\n \"data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO\"\n ]\n })\n```","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"MultiModalOpenAIEmbedding":{"oneOf":[{"$ref":"#/components/schemas/_OpenAIEmbeddingInput_Text"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Audio"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Image"}],"title":"MultiModalOpenAIEmbedding"},"OpenAIEmbeddingInput_Audio":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"dimensions":{"type":"integer","title":"Dimensions","default":0},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["audio"],"const":"audio","title":"Modality","default":"audio"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Audio"},"OpenAIEmbeddingInput_Image":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"dimensions":{"type":"integer","title":"Dimensions","default":0},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["image"],"const":"image","title":"Modality","default":"image"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Image"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["list"],"const":"list","title":"Object","default":"list"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"},"top_n":{"anyOf":[{"type":"integer","exclusiveMinimum":0.0},{"type":"null"}],"title":"Top N"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"},{"items":{"items":{"type":"number"},"type":"array"},"type":"array"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_OpenAIEmbeddingInput_Text":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"dimensions":{"type":"integer","title":"Dimensions","default":0},"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"modality":{"type":"string","enum":["text"],"const":"text","title":"Modality","default":"text"}},"type":"object","required":["input"],"title":"_OpenAIEmbeddingInput_Text","description":"helper"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}} \ No newline at end of file diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py index 0369c880..f35557c4 100644 --- a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py +++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py @@ -18,6 +18,7 @@ class OpenAIEmbeddingInputAudio: model (Union[Unset, str]): Default: 'default/not-specified'. encoding_format (Union[Unset, EmbeddingEncodingFormat]): user (Union[None, Unset, str]): + dimensions (Union[Unset, int]): Default: 0. modality (Union[Unset, OpenAIEmbeddingInputAudioModality]): Default: OpenAIEmbeddingInputAudioModality.AUDIO. """ @@ -25,6 +26,7 @@ class OpenAIEmbeddingInputAudio: model: Union[Unset, str] = "default/not-specified" encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET user: Union[None, Unset, str] = UNSET + dimensions: Union[Unset, int] = 0 modality: Union[Unset, OpenAIEmbeddingInputAudioModality] = OpenAIEmbeddingInputAudioModality.AUDIO additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) @@ -52,6 +54,8 @@ def to_dict(self) -> Dict[str, Any]: else: user = self.user + dimensions = self.dimensions + modality: Union[Unset, str] = UNSET if not isinstance(self.modality, Unset): modality = self.modality.value @@ -69,6 +73,8 @@ def to_dict(self) -> Dict[str, Any]: field_dict["encoding_format"] = encoding_format if user is not UNSET: field_dict["user"] = user + if dimensions is not UNSET: + field_dict["dimensions"] = dimensions if modality is not UNSET: field_dict["modality"] = modality @@ -118,6 +124,8 @@ def _parse_user(data: object) -> Union[None, Unset, str]: user = _parse_user(d.pop("user", UNSET)) + dimensions = d.pop("dimensions", UNSET) + _modality = d.pop("modality", UNSET) modality: Union[Unset, OpenAIEmbeddingInputAudioModality] if isinstance(_modality, Unset): @@ -130,6 +138,7 @@ def _parse_user(data: object) -> Union[None, Unset, str]: model=model, encoding_format=encoding_format, user=user, + dimensions=dimensions, modality=modality, ) diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py index b036ad0e..7ceb6d96 100644 --- a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py +++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py @@ -18,6 +18,7 @@ class OpenAIEmbeddingInputImage: model (Union[Unset, str]): Default: 'default/not-specified'. encoding_format (Union[Unset, EmbeddingEncodingFormat]): user (Union[None, Unset, str]): + dimensions (Union[Unset, int]): Default: 0. modality (Union[Unset, OpenAIEmbeddingInputImageModality]): Default: OpenAIEmbeddingInputImageModality.IMAGE. """ @@ -25,6 +26,7 @@ class OpenAIEmbeddingInputImage: model: Union[Unset, str] = "default/not-specified" encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET user: Union[None, Unset, str] = UNSET + dimensions: Union[Unset, int] = 0 modality: Union[Unset, OpenAIEmbeddingInputImageModality] = OpenAIEmbeddingInputImageModality.IMAGE additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) @@ -52,6 +54,8 @@ def to_dict(self) -> Dict[str, Any]: else: user = self.user + dimensions = self.dimensions + modality: Union[Unset, str] = UNSET if not isinstance(self.modality, Unset): modality = self.modality.value @@ -69,6 +73,8 @@ def to_dict(self) -> Dict[str, Any]: field_dict["encoding_format"] = encoding_format if user is not UNSET: field_dict["user"] = user + if dimensions is not UNSET: + field_dict["dimensions"] = dimensions if modality is not UNSET: field_dict["modality"] = modality @@ -118,6 +124,8 @@ def _parse_user(data: object) -> Union[None, Unset, str]: user = _parse_user(d.pop("user", UNSET)) + dimensions = d.pop("dimensions", UNSET) + _modality = d.pop("modality", UNSET) modality: Union[Unset, OpenAIEmbeddingInputImageModality] if isinstance(_modality, Unset): @@ -130,6 +138,7 @@ def _parse_user(data: object) -> Union[None, Unset, str]: model=model, encoding_format=encoding_format, user=user, + dimensions=dimensions, modality=modality, ) diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text.py index c68d1456..296de221 100644 --- a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text.py +++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text.py @@ -19,6 +19,7 @@ class OpenAIEmbeddingInputText: model (Union[Unset, str]): Default: 'default/not-specified'. encoding_format (Union[Unset, EmbeddingEncodingFormat]): user (Union[None, Unset, str]): + dimensions (Union[Unset, int]): Default: 0. modality (Union[Unset, OpenAIEmbeddingInputTextModality]): Default: OpenAIEmbeddingInputTextModality.TEXT. """ @@ -26,6 +27,7 @@ class OpenAIEmbeddingInputText: model: Union[Unset, str] = "default/not-specified" encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET user: Union[None, Unset, str] = UNSET + dimensions: Union[Unset, int] = 0 modality: Union[Unset, OpenAIEmbeddingInputTextModality] = OpenAIEmbeddingInputTextModality.TEXT additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) @@ -49,6 +51,8 @@ def to_dict(self) -> Dict[str, Any]: else: user = self.user + dimensions = self.dimensions + modality: Union[Unset, str] = UNSET if not isinstance(self.modality, Unset): modality = self.modality.value @@ -66,6 +70,8 @@ def to_dict(self) -> Dict[str, Any]: field_dict["encoding_format"] = encoding_format if user is not UNSET: field_dict["user"] = user + if dimensions is not UNSET: + field_dict["dimensions"] = dimensions if modality is not UNSET: field_dict["modality"] = modality @@ -106,6 +112,8 @@ def _parse_user(data: object) -> Union[None, Unset, str]: user = _parse_user(d.pop("user", UNSET)) + dimensions = d.pop("dimensions", UNSET) + _modality = d.pop("modality", UNSET) modality: Union[Unset, OpenAIEmbeddingInputTextModality] if isinstance(_modality, Unset): @@ -118,6 +126,7 @@ def _parse_user(data: object) -> Union[None, Unset, str]: model=model, encoding_format=encoding_format, user=user, + dimensions=dimensions, modality=modality, ) diff --git a/libs/client_infinity/infinity_client/pyproject.toml b/libs/client_infinity/infinity_client/pyproject.toml index cd3f6861..b17ebe12 100644 --- a/libs/client_infinity/infinity_client/pyproject.toml +++ b/libs/client_infinity/infinity_client/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "infinity_client" -version = "0.0.72" +version = "0.0.73" description = "A client library for accessing ♾️ Infinity - Embedding Inference Server" authors = [] readme = "README.md" diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py index d7c392e9..94c3c3d6 100644 --- a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py +++ b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py @@ -54,7 +54,7 @@ class _OpenAIEmbeddingInput(BaseModel): model: str = "default/not-specified" encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float user: Optional[str] = None - dimensions: Optional[Annotated[int, Field(strict=True, gt=0, lt=8193)]] = None + dimensions: int = 0 class _OpenAIEmbeddingInput_Text(_OpenAIEmbeddingInput): diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py index 1edda315..1bcf14eb 100644 --- a/libs/infinity_emb/infinity_emb/inference/batch_handler.py +++ b/libs/infinity_emb/infinity_emb/inference/batch_handler.py @@ -26,6 +26,7 @@ ImageClassType, ModelCapabilites, ModelNotDeployedError, + MatryoshkaDimError, OverloadStatus, PredictSingle, PrioritizedQueueItem, @@ -61,6 +62,18 @@ def submit(self, *args, **kwargs): return self._tp.submit(*args, **kwargs) +def matryososka_slice( + embeddings: list[np.ndarray], matryoshka_dim: Optional[int] +) -> list[np.ndarray]: + if matryoshka_dim: + if 1 > matryoshka_dim or matryoshka_dim > len(embeddings[0]): + raise MatryoshkaDimError( + f"matryoshka_dim={matryoshka_dim} is not in a valid range. Select between 1 and {len(embeddings[0])}." + ) + return [e[:matryoshka_dim] for e in embeddings] + return embeddings + + class BatchHandler: def __init__( self, @@ -159,9 +172,7 @@ async def embed( input_sentences = [EmbeddingSingle(sentence=s) for s in sentences] embeddings, usage = await self._schedule(input_sentences) - if matryoshka_dim: - embeddings = [embedding[:matryoshka_dim] for embedding in embeddings] - return embeddings, usage + return matryososka_slice(embeddings, matryoshka_dim), usage async def rerank( self, @@ -267,9 +278,7 @@ async def image_embed( items = await resolve_images(images) embeddings, usage = await self._schedule(items) - if matryoshka_dim: - embeddings = [embedding[:matryoshka_dim] for embedding in embeddings] - return embeddings, usage + return matryososka_slice(embeddings, matryoshka_dim), usage async def audio_embed( self, *, audios: list[Union[str, bytes]], matryoshka_dim: Optional[int] = None @@ -299,9 +308,7 @@ async def audio_embed( getattr(self.model_worker[0]._model, "sampling_rate", -42), ) embeddings, usage = await self._schedule(items) - if matryoshka_dim: - embeddings = [embedding[:matryoshka_dim] for embedding in embeddings] - return embeddings, usage + return matryososka_slice(embeddings, matryoshka_dim), usage async def _schedule(self, list_queueitem: Sequence[AbstractSingle]) -> tuple[list[Any], int]: """adds list of items to the queue and awaits until these are completed.""" diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py index 36f7c6a4..a835d3e3 100644 --- a/libs/infinity_emb/infinity_emb/infinity_server.py +++ b/libs/infinity_emb/infinity_emb/infinity_server.py @@ -29,6 +29,7 @@ InferenceEngine, Modality, ModelCapabilites, + MatryoshkaDimError, ModelNotDeployedError, PoolingMethod, ) @@ -390,14 +391,9 @@ def url_to_base64(url, modality = "image"): f"ModelNotDeployedError: model=`{data_root.model}` does not support `embed` for modality `{modality.value}`. Reason: {ex}", code=status.HTTP_400_BAD_REQUEST, ) - except (ImageCorruption, AudioCorruption) as ex: - # get urls_or_bytes if not defined - try: - urls_or_bytes = urls_or_bytes - except NameError: - urls_or_bytes = [] + except (ImageCorruption, AudioCorruption, MatryoshkaDimError) as ex: raise errors.OpenAIException( - f"{modality.value}Corruption, could not open {[b if isinstance(b, str) else 'bytes' for b in urls_or_bytes]} -> {ex}", + f"{ex.__class__} -> {ex}", code=status.HTTP_400_BAD_REQUEST, ) except Exception as ex: @@ -545,9 +541,9 @@ async def _embeddings_image(data: ImageEmbeddingInput): encoding_format=data.encoding_format, usage=usage, ) - except ImageCorruption as ex: + except (ImageCorruption, MatryoshkaDimError) as ex: raise errors.OpenAIException( - f"ImageCorruption, could not open {[b if isinstance(b, str) else 'bytes' for b in urls_or_bytes]} -> {ex}", + f"{ex.__class__} -> {ex}", code=status.HTTP_400_BAD_REQUEST, ) except ModelNotDeployedError as ex: @@ -604,9 +600,9 @@ async def _embeddings_audio(data: AudioEmbeddingInput): encoding_format=data.encoding_format, usage=usage, ) - except AudioCorruption as ex: + except (AudioCorruption, MatryoshkaDimError) as ex: raise errors.OpenAIException( - f"AudioCorruption, could not open {[b if isinstance(b, str) else 'bytes' for b in urls_or_bytes]} -> {ex}", + f"{ex.__class__} -> {ex}", code=status.HTTP_400_BAD_REQUEST, ) except ModelNotDeployedError as ex: diff --git a/libs/infinity_emb/infinity_emb/primitives.py b/libs/infinity_emb/infinity_emb/primitives.py index 7ff8d404..34677069 100644 --- a/libs/infinity_emb/infinity_emb/primitives.py +++ b/libs/infinity_emb/infinity_emb/primitives.py @@ -436,6 +436,10 @@ class ModelNotDeployedError(Exception): pass +class MatryoshkaDimError(Exception): + pass + + class ImageCorruption(Exception): pass diff --git a/libs/infinity_emb/infinity_emb/transformer/audio/utils.py b/libs/infinity_emb/infinity_emb/transformer/audio/utils.py index 4893df2a..b31a0a81 100644 --- a/libs/infinity_emb/infinity_emb/transformer/audio/utils.py +++ b/libs/infinity_emb/infinity_emb/transformer/audio/utils.py @@ -51,13 +51,9 @@ async def resolve_audios( CHECK_AIOHTTP.mark_required() CHECK_SOUNDFILE.mark_required() - resolved_audios: list[AudioSingle] = [] async with aiohttp.ClientSession(trust_env=True) as session: - try: - resolved_audios = await asyncio.gather( - *[resolve_audio(audio, allowed_sampling_rate, session) for audio in audio_urls] - ) - except Exception as e: - raise AudioCorruption(f"Failed to resolve audio: {e}") + resolved_audios = await asyncio.gather( + *[resolve_audio(audio, allowed_sampling_rate, session) for audio in audio_urls] + ) return resolved_audios diff --git a/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py b/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py index 51edef0f..ae995b13 100644 --- a/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py +++ b/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py @@ -39,7 +39,7 @@ def __init__(self, *, engine_args: EngineArgs): prefer_quantized=("cpu" in provider.lower() or "openvino" in provider.lower()), ) - self.model = optimize_model( + model = optimize_model( model_name_or_path=engine_args.model_name_or_path, model_class=ORTModelForSequenceClassification, revision=engine_args.revision, @@ -48,7 +48,7 @@ def __init__(self, *, engine_args: EngineArgs): file_name=onnx_file.as_posix(), optimize_model=not os.environ.get("INFINITY_ONNX_DISABLE_OPTIMIZE", False), ) - self.model.use_io_binding = False + model.use_io_binding = False self.tokenizer = AutoTokenizer.from_pretrained( engine_args.model_name_or_path, @@ -60,12 +60,11 @@ def __init__(self, *, engine_args: EngineArgs): self._pipe = pipeline( task="text-classification", - model=self.model, + model=model, trust_remote_code=engine_args.trust_remote_code, top_k=None, revision=engine_args.revision, tokenizer=self.tokenizer, - device=engine_args.device, ) def encode_pre(self, sentences: list[str]): diff --git a/libs/infinity_emb/infinity_emb/transformer/vision/utils.py b/libs/infinity_emb/infinity_emb/transformer/vision/utils.py index 16b80c37..f0378fde 100644 --- a/libs/infinity_emb/infinity_emb/transformer/vision/utils.py +++ b/libs/infinity_emb/infinity_emb/transformer/vision/utils.py @@ -78,11 +78,7 @@ async def resolve_images( CHECK_PIL.mark_required() resolved_imgs = [] - - try: - async with aiohttp.ClientSession(trust_env=True) as session: - resolved_imgs = await asyncio.gather(*[resolve_image(img, session) for img in images]) - except Exception as e: - raise ImageCorruption(f"Failed to resolve image: {images}.\nError msg: {str(e)}") + async with aiohttp.ClientSession(trust_env=True) as session: + resolved_imgs = await asyncio.gather(*[resolve_image(img, session) for img in images]) return resolved_imgs diff --git a/libs/infinity_emb/pyproject.toml b/libs/infinity_emb/pyproject.toml index 654cdf13..1bd621d1 100644 --- a/libs/infinity_emb/pyproject.toml +++ b/libs/infinity_emb/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "infinity_emb" -version = "0.0.72" +version = "0.0.73" description = "Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip." authors = ["michaelfeil "] license = "MIT" diff --git a/libs/infinity_emb/tests/unit_test/transformer/classifier/test_optimum_classifier.py b/libs/infinity_emb/tests/unit_test/transformer/classifier/test_optimum_classifier.py index 386c3061..95b4104d 100644 --- a/libs/infinity_emb/tests/unit_test/transformer/classifier/test_optimum_classifier.py +++ b/libs/infinity_emb/tests/unit_test/transformer/classifier/test_optimum_classifier.py @@ -1,7 +1,6 @@ -import torch -from optimum.pipelines import pipeline # type: ignore -from optimum.onnxruntime import ORTModelForSequenceClassification +from transformers.pipelines import pipeline # type: ignore from infinity_emb.args import EngineArgs + from infinity_emb.transformer.classifier.optimum import OptimumClassifier @@ -9,15 +8,12 @@ def test_classifier(model_name: str = "SamLowe/roberta-base-go_emotions-onnx"): model = OptimumClassifier( engine_args=EngineArgs( model_name_or_path=model_name, - device="cuda" if torch.cuda.is_available() else "cpu", ) # type: ignore ) pipe = pipeline( task="text-classification", - model=ORTModelForSequenceClassification.from_pretrained( - model_name, file_name="onnx/model_quantized.onnx" - ), + model="SamLowe/roberta-base-go_emotions", # hoping that this is the same model as model_name top_k=None, )