Skip to content

Commit

Permalink
a few updates of website
Browse files Browse the repository at this point in the history
  • Loading branch information
JamesZhutheThird committed Feb 5, 2024
1 parent 6976cd6 commit e31da14
Show file tree
Hide file tree
Showing 24 changed files with 98 additions and 26 deletions.
6 changes: 6 additions & 0 deletions eval/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@
"executor": "llama2",
"evaluator": "Llama2Evaluator",
},
"dfm": {
"model_type": "local",
"support_input": [0, 1],
"executor": "dfm",
"evaluator": "DFMEvaluator",
},
}


Expand Down
53 changes: 44 additions & 9 deletions eval/eval.sh
Original file line number Diff line number Diff line change
@@ -1,23 +1,58 @@
#!/bin/bash

export CUDA_VISIBLE_DEVICES=3,4,5,6,7
python eval.py \
--problem_file ../data/problem_v1.2.0_20231217.json \
--knowledge_file ../data/knowledge_v1.2.0_20231217.json \
--questions_type 0,1,2,3 \
--input_type 0 \
--model llama2 \
--model_dir ../models/dfm-2.0-13b \
--cuda_device cuda:5 \
--exp_name dfm-2.0-13b
--model dfm \
--model_dir ../models/dfm-2.0-70b \
--cuda_device auto

python eval.py \
--problem_file ../data/problem_v1.2.0_20231217.json \
--knowledge_file ../data/knowledge_v1.2.0_20231217.json \
--caption_file ../data/captions_v1.2.0_20231217.csv \
--questions_type 0,1,2,3 \
--input_type 1 \
--model dfm \
--model_dir ../models/dfm-2.0-70b \
--cuda_device auto

python eval.py \
--problem_file ../data/problem_v1.2.0_20231217.json \
--caption_file ../data/ocr_v1.2.0_20231217.csv \
--questions_type 0,1,2,3 \
--input_type 1 \
--model dfm \
--model_dir ../models/dfm-2.0-70b \
--cuda_device auto

export CUDA_VISIBLE_DEVICES=0
python eval.py \
--problem_file ../data/problem_v1.2.0_20231217.json \
--questions_type 0,1,2,3 \
--input_type 0 \
--model llama2 \
--model dfm \
--model_dir ../models/dfm-2.0-13b \
--cuda_device cuda:6 \
--exp_name dfm-2.0-13b
--cuda_device auto

export CUDA_VISIBLE_DEVICES=1
python eval.py \
--problem_file ../data/problem_v1.2.0_20231217.json \
--caption_file ../data/captions_v1.2.0_20231217.csv \
--questions_type 0,1,2,3 \
--input_type 1 \
--model dfm \
--model_dir ../models/dfm-2.0-13b \
--cuda_device auto

python eval.py --checkpoint_dir ../results/dfm-2.0-13b_llama2_input_0_shot_0_kn_20240125_191329
export CUDA_VISIBLE_DEVICES=2
python eval.py \
--problem_file ../data/problem_v1.2.0_20231217.json \
--caption_file ../data/ocr_v1.2.0_20231217.csv \
--questions_type 0,1,2,3 \
--input_type 1 \
--model dfm \
--model_dir ../models/dfm-2.0-13b \
--cuda_device auto
2 changes: 1 addition & 1 deletion eval/extract_response_on_hard.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<<<<<<< Updated upstream
#folder_list="gpt-4-vision-preview_input_2_shot_0_20231221_101231/add_no_image qwen-vl_input_2_shot_0_it_20231225_083611 gemini-pro-vision_input_2_shot_0_20231224_004723/add_no_image moss_input_0_shot_0_20231226_070654 gemini-pro_input_0_shot_0_20231223_141324 gpt-3.5-turbo-0613_input_0_shot_0_20231220_143731 viscpm_input_2_shot_0_it_bi_20231228_194002 gpt-4-1106-preview_input_0_shot_0_20231220_214000 visualglm_input_2_shot_0_it_20231226_070315"

folder_list="gpt-3.5-turbo-1106_input_0_shot_0_20240124_162631"
folder_list="dfm_input_0_shot_0_20240130_161509"

for folder in $folder_list; do
python extract_response_on_hard.py -j ../data/selected_hard_list_v1.2.0_20231217.json -i $folder
Expand Down
40 changes: 40 additions & 0 deletions eval/models/dfm_hf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""dfm-2.0 evaluator with HuggingFace Transformers"""

from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch
import pdb

class DFMEvaluator:
def __init__(self, model_dir="dfm-2.0-13b", max_tokens=200, device_map="cuda:0"):
self.model_dir = model_dir
self.sample_params = {
"max_new_tokens": max_tokens,
"do_sample": False,
}
self.device_map = device_map

self.model = AutoModelForCausalLM.from_pretrained(self.model_dir, device_map=device_map, torch_dtype=torch.float16, trust_remote_code=True).half().eval()
self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir, trust_remote_code=True)

self.model.generation_config.__dict__.update(self.sample_params)

def prepare_inputs(self, content_sys, content):
content = f"<|system|>:{content_sys.strip()}\n<|user|>:{content.strip()}<|assistant|>:"
return content

def generate_response(self, question):
message = self.prepare_inputs(question["prompted_system_content"],question["prompted_content"])
inputs = self.tokenizer([message],add_special_tokens=False, return_tensors="pt")
pred = self.model.generate(input_ids=inputs.input_ids[0, :4096].cuda().unsqueeze(0), eos_token_id=self.tokenizer.eos_token_id, pad_token_id=self.tokenizer.eos_token_id, **self.sample_params, )
input_length = inputs.input_ids.size(1)
response = self.tokenizer.decode(pred[0][input_length:], skip_special_tokens=True).strip()
return response, message

def generate_answer(self, question):
response, message = self.generate_response(question)
question["input_message"] = message
question["prediction"] = response
question.pop("prompted_content")
question.pop("prompted_system_content")
return question
10 changes: 5 additions & 5 deletions eval/models/llama2_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pdb

class Llama2Evaluator:
def __init__(self, model_dir="/home/ubuntu/tools/llama2/llama-2-13b-chat-hf", max_tokens=200, device_map="auto"):
def __init__(self, model_dir="Llama-2-13b-chat-hf", max_tokens=200, device_map="auto"):
self.model_dir = model_dir
self.sample_params = {
"max_new_tokens": max_tokens,
Expand All @@ -15,18 +15,18 @@ def __init__(self, model_dir="/home/ubuntu/tools/llama2/llama-2-13b-chat-hf", ma
self.device_map = device_map

self.model = AutoModelForCausalLM.from_pretrained(self.model_dir, device_map=device_map, torch_dtype=torch.float16, trust_remote_code=True).half().eval()
self.tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", trust_remote_code=True)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir, trust_remote_code=True)

self.model.generation_config.__dict__.update(self.sample_params)

def prepare_inputs(self, content_sys,content):
content = f"<s>[INST] <<SYS>> {content_sys} <</SYS>> \n\n {content} [/INST]"
def prepare_inputs(self, content_sys, content):
content = f"<s>[INST] <<SYS>> {content_sys} <</SYS>> \n\n {{content}} [/INST]"
return content

def generate_response(self, question):
message = self.prepare_inputs(question["prompted_system_content"],question["prompted_content"])
inputs = self.tokenizer([message],add_special_tokens=False, return_tensors="pt")
pred = self.model.generate(input_ids=inputs.input_ids[0, :2048].cuda().unsqueeze(0), eos_token_id=self.tokenizer.eos_token_id, pad_token_id=self.tokenizer.eos_token_id, **self.sample_params, )
pred = self.model.generate(input_ids=inputs.input_ids[0, :4096].cuda().unsqueeze(0).to(self.device_map), eos_token_id=self.tokenizer.eos_token_id, pad_token_id=self.tokenizer.eos_token_id, **self.sample_params, )
input_length = inputs.input_ids.size(1)
response = self.tokenizer.decode(pred[0][input_length:], skip_special_tokens=True).strip()
return response, message
Expand Down
2 changes: 1 addition & 1 deletion eval/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def get_prompt(question, args):
prompted += fs_shot_guide_example[args.lang][question_type]
prompted += fs_end_example[args.lang]

if args.model in ['gpt','gpt4v','llama2','gemini','geminivision']:
if args.model in ['gpt','gpt4v','llama2','gemini','geminivision','dfm']:
prompted_question["prompted_system_content"] = prompted
prompted = "" # TODO: Identify GPT in this way seems not so reasonable.

Expand Down
2 changes: 1 addition & 1 deletion website/README_en.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ MULTI: Multimodal Understanding Leaderboard with Text and Images

## Authors

Zichen Zhu, Yang Xu, Lu Chen, Jingkai Yang, Yichuan Ma, Yimin Sun, Hailin Wen, Jiaqi Liu, Jinyu Cai, Yingzi Ma, Liangtai Sun, Zihan Zhao, Kai Yu
Zichen Zhu, Yang Xu, Lu Chen, Jingkai Yang, Yichuan Ma, Yiming Sun, Hailin Wen, Jiaqi Liu, Jinyu Cai, Yingzi Ma, Liangtai Sun, Zihan Zhao, Kai Yu

X-LANCE Lab, Department of Computer Science and Engineering

Expand Down
Binary file removed website/static/images/case_study.png
Binary file not shown.
Binary file removed website/static/images/case_study_wide.png
Binary file not shown.
Binary file modified website/static/images/data_anno.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified website/static/images/data_aug.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added website/static/images/example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added website/static/images/examples.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified website/static/images/overview.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified website/static/images/platform.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed website/static/images/prompt.png
Binary file not shown.
Binary file added website/static/images/prompts_all.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added website/static/pdfs/MULTI_Benchmark_v1.0.pdf
Binary file not shown.
9 changes: 0 additions & 9 deletions website/static/pdfs/license_agreement.txt

This file was deleted.

Binary file removed website/static/pdfs/visit_bench_paper.pdf
Binary file not shown.
Binary file removed website/static/videos/banner_video.mp4
Binary file not shown.
Binary file removed website/static/videos/carousel1.mp4
Binary file not shown.
Binary file removed website/static/videos/carousel2.mp4
Binary file not shown.
Binary file removed website/static/videos/carousel3.mp4
Binary file not shown.

0 comments on commit e31da14

Please sign in to comment.