a few updates of website

OpenDFM · Feb 5, 2024 · e31da14 · e31da14
1 parent 6976cd6
commit e31da14
Show file tree

Hide file tree

Showing 24 changed files with 98 additions and 26 deletions.
diff --git a/eval/args.py b/eval/args.py
@@ -63,6 +63,12 @@
         "executor": "llama2",
         "evaluator": "Llama2Evaluator",
     },
+    "dfm": {
+        "model_type": "local",
+        "support_input": [0, 1],
+        "executor": "dfm",
+        "evaluator": "DFMEvaluator",
+    },
 }
 
 

diff --git a/eval/eval.sh b/eval/eval.sh
@@ -1,23 +1,58 @@
 #!/bin/bash
 
+export CUDA_VISIBLE_DEVICES=3,4,5,6,7
 python eval.py \
      --problem_file ../data/problem_v1.2.0_20231217.json \
+     --knowledge_file ../data/knowledge_v1.2.0_20231217.json \
      --questions_type 0,1,2,3 \
      --input_type 0 \
-     --model llama2 \
-     --model_dir ../models/dfm-2.0-13b \
-     --cuda_device cuda:5 \
-     --exp_name dfm-2.0-13b
+     --model dfm \
+     --model_dir ../models/dfm-2.0-70b \
+     --cuda_device auto
 
 python eval.py \
      --problem_file ../data/problem_v1.2.0_20231217.json \
-     --knowledge_file ../data/knowledge_v1.2.0_20231217.json \
+     --caption_file ../data/captions_v1.2.0_20231217.csv \
+     --questions_type 0,1,2,3 \
+     --input_type 1 \
+     --model dfm \
+     --model_dir ../models/dfm-2.0-70b \
+     --cuda_device auto
+
+python eval.py \
+     --problem_file ../data/problem_v1.2.0_20231217.json \
+     --caption_file ../data/ocr_v1.2.0_20231217.csv \
+     --questions_type 0,1,2,3 \
+     --input_type 1 \
+     --model dfm \
+     --model_dir ../models/dfm-2.0-70b \
+     --cuda_device auto
+
+export CUDA_VISIBLE_DEVICES=0
+python eval.py \
+     --problem_file ../data/problem_v1.2.0_20231217.json \
      --questions_type 0,1,2,3 \
      --input_type 0 \
-     --model llama2 \
+     --model dfm \
      --model_dir ../models/dfm-2.0-13b \
-     --cuda_device cuda:6 \
-     --exp_name dfm-2.0-13b
+     --cuda_device auto
 
+export CUDA_VISIBLE_DEVICES=1
+python eval.py \
+     --problem_file ../data/problem_v1.2.0_20231217.json \
+     --caption_file ../data/captions_v1.2.0_20231217.csv \
+     --questions_type 0,1,2,3 \
+     --input_type 1 \
+     --model dfm \
+     --model_dir ../models/dfm-2.0-13b \
+     --cuda_device auto
 
-python eval.py --checkpoint_dir ../results/dfm-2.0-13b_llama2_input_0_shot_0_kn_20240125_191329
+export CUDA_VISIBLE_DEVICES=2
+python eval.py \
+     --problem_file ../data/problem_v1.2.0_20231217.json \
+     --caption_file ../data/ocr_v1.2.0_20231217.csv \
+     --questions_type 0,1,2,3 \
+     --input_type 1 \
+     --model dfm \
+     --model_dir ../models/dfm-2.0-13b \
+     --cuda_device auto
diff --git a/eval/extract_response_on_hard.sh b/eval/extract_response_on_hard.sh
@@ -2,7 +2,7 @@
 <<<<<<< Updated upstream
 #folder_list="gpt-4-vision-preview_input_2_shot_0_20231221_101231/add_no_image qwen-vl_input_2_shot_0_it_20231225_083611 gemini-pro-vision_input_2_shot_0_20231224_004723/add_no_image moss_input_0_shot_0_20231226_070654 gemini-pro_input_0_shot_0_20231223_141324 gpt-3.5-turbo-0613_input_0_shot_0_20231220_143731 viscpm_input_2_shot_0_it_bi_20231228_194002 gpt-4-1106-preview_input_0_shot_0_20231220_214000 visualglm_input_2_shot_0_it_20231226_070315"
 
-folder_list="gpt-3.5-turbo-1106_input_0_shot_0_20240124_162631"
+folder_list="dfm_input_0_shot_0_20240130_161509"
 
 for folder in $folder_list; do
     python extract_response_on_hard.py -j ../data/selected_hard_list_v1.2.0_20231217.json -i $folder

diff --git a/eval/models/dfm_hf.py b/eval/models/dfm_hf.py
@@ -0,0 +1,40 @@
+"""dfm-2.0 evaluator with HuggingFace Transformers"""
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import transformers
+import torch
+import pdb
+
+class DFMEvaluator:
+    def __init__(self, model_dir="dfm-2.0-13b", max_tokens=200, device_map="cuda:0"):
+        self.model_dir = model_dir
+        self.sample_params = {
+            "max_new_tokens": max_tokens,
+            "do_sample": False,
+        }
+        self.device_map = device_map
+
+        self.model = AutoModelForCausalLM.from_pretrained(self.model_dir, device_map=device_map, torch_dtype=torch.float16, trust_remote_code=True).half().eval()
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir, trust_remote_code=True)
+
+        self.model.generation_config.__dict__.update(self.sample_params)
+
+    def prepare_inputs(self, content_sys, content):
+        content = f"<|system|>:{content_sys.strip()}\n<|user|>:{content.strip()}<|assistant|>:"
+        return content
+
+    def generate_response(self, question):
+        message = self.prepare_inputs(question["prompted_system_content"],question["prompted_content"])
+        inputs = self.tokenizer([message],add_special_tokens=False, return_tensors="pt")
+        pred = self.model.generate(input_ids=inputs.input_ids[0, :4096].cuda().unsqueeze(0), eos_token_id=self.tokenizer.eos_token_id, pad_token_id=self.tokenizer.eos_token_id, **self.sample_params, )
+        input_length = inputs.input_ids.size(1)
+        response = self.tokenizer.decode(pred[0][input_length:], skip_special_tokens=True).strip()
+        return response, message
+
+    def generate_answer(self, question):
+        response, message = self.generate_response(question)
+        question["input_message"] = message
+        question["prediction"] = response
+        question.pop("prompted_content")
+        question.pop("prompted_system_content")
+        return question
diff --git a/eval/models/llama2_hf.py b/eval/models/llama2_hf.py
@@ -6,7 +6,7 @@
 import pdb
 
 class Llama2Evaluator:
-    def __init__(self, model_dir="/home/ubuntu/tools/llama2/llama-2-13b-chat-hf", max_tokens=200, device_map="auto"):
+    def __init__(self, model_dir="Llama-2-13b-chat-hf", max_tokens=200, device_map="auto"):
         self.model_dir = model_dir
         self.sample_params = {
             "max_new_tokens": max_tokens,
@@ -15,18 +15,18 @@ def __init__(self, model_dir="/home/ubuntu/tools/llama2/llama-2-13b-chat-hf", ma
         self.device_map = device_map
 
         self.model = AutoModelForCausalLM.from_pretrained(self.model_dir, device_map=device_map, torch_dtype=torch.float16, trust_remote_code=True).half().eval()
-        self.tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", trust_remote_code=True)
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir, trust_remote_code=True)
 
         self.model.generation_config.__dict__.update(self.sample_params)
 
-    def prepare_inputs(self, content_sys,content):
-        content = f"<s>[INST] <<SYS>> {content_sys} <</SYS>> \n\n {content} [/INST]"
+    def prepare_inputs(self, content_sys, content):
+        content = f"<s>[INST] <<SYS>> {content_sys} <</SYS>> \n\n {{content}} [/INST]"
         return content
 
     def generate_response(self, question):
         message = self.prepare_inputs(question["prompted_system_content"],question["prompted_content"])
         inputs = self.tokenizer([message],add_special_tokens=False, return_tensors="pt")
-        pred = self.model.generate(input_ids=inputs.input_ids[0, :2048].cuda().unsqueeze(0), eos_token_id=self.tokenizer.eos_token_id, pad_token_id=self.tokenizer.eos_token_id, **self.sample_params, )
+        pred = self.model.generate(input_ids=inputs.input_ids[0, :4096].cuda().unsqueeze(0).to(self.device_map), eos_token_id=self.tokenizer.eos_token_id, pad_token_id=self.tokenizer.eos_token_id, **self.sample_params, )
         input_length = inputs.input_ids.size(1)
         response = self.tokenizer.decode(pred[0][input_length:], skip_special_tokens=True).strip()
         return response, message

diff --git a/eval/prompts.py b/eval/prompts.py
@@ -182,7 +182,7 @@ def get_prompt(question, args):
         prompted += fs_shot_guide_example[args.lang][question_type]
         prompted += fs_end_example[args.lang]
 
-    if args.model in ['gpt','gpt4v','llama2','gemini','geminivision']:
+    if args.model in ['gpt','gpt4v','llama2','gemini','geminivision','dfm']:
         prompted_question["prompted_system_content"] = prompted
         prompted = ""  # TODO: Identify GPT in this way seems not so reasonable.
 

diff --git a/website/README_en.md b/website/README_en.md
@@ -5,7 +5,7 @@ MULTI: Multimodal Understanding Leaderboard with Text and Images
 
 ## Authors
 
-Zichen Zhu, Yang Xu, Lu Chen, Jingkai Yang, Yichuan Ma, Yimin Sun, Hailin Wen, Jiaqi Liu, Jinyu Cai, Yingzi Ma, Liangtai Sun, Zihan Zhao, Kai Yu
+Zichen Zhu, Yang Xu, Lu Chen, Jingkai Yang, Yichuan Ma, Yiming Sun, Hailin Wen, Jiaqi Liu, Jinyu Cai, Yingzi Ma, Liangtai Sun, Zihan Zhao, Kai Yu
 
 X-LANCE Lab, Department of Computer Science and Engineering
 

diff --git a/website/static/images/case_study.png b/website/static/images/case_study.png
diff --git a/website/static/images/case_study_wide.png b/website/static/images/case_study_wide.png
diff --git a/website/static/images/data_anno.png b/website/static/images/data_anno.png
diff --git a/website/static/images/data_aug.png b/website/static/images/data_aug.png
diff --git a/website/static/images/example.png b/website/static/images/example.png
diff --git a/website/static/images/examples.png b/website/static/images/examples.png
diff --git a/website/static/images/overview.png b/website/static/images/overview.png
diff --git a/website/static/images/platform.png b/website/static/images/platform.png
diff --git a/website/static/images/prompt.png b/website/static/images/prompt.png
diff --git a/website/static/images/prompts_all.png b/website/static/images/prompts_all.png
diff --git a/website/static/pdfs/MULTI_Benchmark_v1.0.pdf b/website/static/pdfs/MULTI_Benchmark_v1.0.pdf
diff --git a/website/static/pdfs/license_agreement.txt b/website/static/pdfs/license_agreement.txt
diff --git a/website/static/pdfs/visit_bench_paper.pdf b/website/static/pdfs/visit_bench_paper.pdf
diff --git a/website/static/videos/banner_video.mp4 b/website/static/videos/banner_video.mp4
diff --git a/website/static/videos/carousel1.mp4 b/website/static/videos/carousel1.mp4
diff --git a/website/static/videos/carousel2.mp4 b/website/static/videos/carousel2.mp4
diff --git a/website/static/videos/carousel3.mp4 b/website/static/videos/carousel3.mp4