[Model]: Add support for Aria model (vllm-project#10514)

Signed-off-by: xffxff <[email protected]> Co-authored-by: Isotr0py <[email protected]>
neuralmagic · Nov 25, 2024 · b1d9205 · b1d9205
1 parent 452a4e8
commit b1d9205
Show file tree

Hide file tree

Showing 8 changed files with 791 additions and 0 deletions.
diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst
@@ -476,6 +476,12 @@ Text Generation
     - Example HF Models
     - :ref:`LoRA <lora>`
     - :ref:`PP <distributed_serving>`
+  * - :code:`AriaForConditionalGeneration`
+    - Aria
+    - T + I
+    - :code:`rhymes-ai/Aria`
+    - 
+    - ✅︎
   * - :code:`Blip2ForConditionalGeneration`
     - BLIP-2
     - T + I\ :sup:`E`

diff --git a/examples/offline_inference_vision_language.py b/examples/offline_inference_vision_language.py
@@ -402,6 +402,23 @@ def run_idefics3(question: str, modality: str):
     return llm, prompt, stop_token_ids
 
 
+# Aria
+def run_aria(question: str, modality: str):
+    assert modality == "image"
+    model_name = "rhymes-ai/Aria"
+
+    llm = LLM(model=model_name,
+              tokenizer_mode="slow",
+              trust_remote_code=True,
+              dtype="bfloat16")
+
+    prompt = (f"<|im_start|>user\n<fim_prefix><|img|><fim_suffix>\n{question}"
+              "<|im_end|>\n<|im_start|>assistant\n")
+
+    stop_token_ids = [93532, 93653, 944, 93421, 1019, 93653, 93519]
+    return llm, prompt, stop_token_ids
+
+
 model_example_map = {
     "llava": run_llava,
     "llava-next": run_llava_next,
@@ -423,6 +440,7 @@ def run_idefics3(question: str, modality: str):
     "molmo": run_molmo,
     "glm4v": run_glm4v,
     "idefics3": run_idefics3,
+    "aria": run_aria,
 }
 
 

diff --git a/examples/offline_inference_vision_language_multi_image.py b/examples/offline_inference_vision_language_multi_image.py
@@ -321,6 +321,25 @@ def load_idefics3(question, image_urls: List[str]) -> ModelRequestData:
     )
 
 
+def load_aria(question, image_urls: List[str]) -> ModelRequestData:
+    model_name = "rhymes-ai/Aria"
+    llm = LLM(model=model_name,
+              tokenizer_mode="slow",
+              trust_remote_code=True,
+              dtype="bfloat16",
+              limit_mm_per_prompt={"image": len(image_urls)})
+    placeholders = "<fim_prefix><|img|><fim_suffix>\n" * len(image_urls)
+    prompt = (f"<|im_start|>user\n{placeholders}{question}<|im_end|>\n"
+              "<|im_start|>assistant\n")
+    stop_token_ids = [93532, 93653, 944, 93421, 1019, 93653, 93519]
+    return ModelRequestData(
+        llm=llm,
+        prompt=prompt,
+        stop_token_ids=stop_token_ids,
+        image_data=[fetch_image(url) for url in image_urls],
+        chat_template=None)
+
+
 model_example_map = {
     "phi3_v": load_phi3v,
     "h2ovl_chat": load_h2onvl,
@@ -330,6 +349,7 @@ def load_idefics3(question, image_urls: List[str]) -> ModelRequestData:
     "qwen_vl_chat": load_qwenvl_chat,
     "mllama": load_mllama,
     "idefics3": load_idefics3,
+    "aria": load_aria,
 }
 
 

diff --git a/tests/models/registry.py b/tests/models/registry.py
@@ -43,6 +43,8 @@ class _HfExamplesInfo:
                                          trust_remote_code=True),
     "ArcticForCausalLM": _HfExamplesInfo("Snowflake/snowflake-arctic-instruct",
                                          trust_remote_code=True),
+    "AriaForConditionalGeneration": _HfExamplesInfo("rhymes-ai/Aria",
+                                                    trust_remote_code=True),
     "BaiChuanForCausalLM": _HfExamplesInfo("baichuan-inc/Baichuan-7B",
                                          trust_remote_code=True),
     "BaichuanForCausalLM": _HfExamplesInfo("baichuan-inc/Baichuan2-7B-chat",

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
@@ -412,6 +412,8 @@ def _placeholder_str(self, modality: ModalityStr,
                 return ""
             if model_type == "idefics3":
                 return "<image>"
+            if model_type == "aria":
+                return "<|fim_prefix|><|img|><|fim_suffix|>"
 
             raise TypeError(f"Unknown {modality} model type: {model_type}")
         elif modality == "audio":