Skip to content

Commit

Permalink
update qwenvl
Browse files Browse the repository at this point in the history
  • Loading branch information
Zimix0 committed Dec 29, 2023
1 parent eb4988e commit d067cc1
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 5 deletions.
14 changes: 11 additions & 3 deletions real_gemini/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@
from pathlib import Path
from utils_st.audio2text import audio2text_from_bytes
from utils_st.extracte_img import get_main_img
from utils_st.get_gpt4v_response import gpt4v,gpt4v_client
from utils_st.get_gpt4v_response import gpt4v_client
from utils_st.get_qwen_response import QwenVL_client
from utils_st.text2audio import text2audio,autoplay_audio
from utils_st.record_video import record
from queue import Queue
import time
import cv2
from threading import Thread,Event

img = {'assistant':'./source/bot.png','user':None}
res_ = {'Qwen-vl':QwenVL_client,'gpt4v':gpt4v_client}

# 设置事件锁
event_record = Event()
event_chat = Event()
Expand All @@ -19,7 +23,9 @@
with st.sidebar:
with st.form('参数配置'):
max_chat_turn = st.slider('最大对话轮数:',min_value=1,max_value=10000,value=10)
response_name = st.selectbox('选择模型',['Qwen-vl','gpt4v'],index=1)
st.form_submit_button('提交配置')
responser = res_[response_name]
max_record_round = 2*max_chat_turn
q = Queue(max_record_round)

Expand All @@ -34,7 +40,7 @@
# in_file_audio = RECORD_DIR / f"{prefix}_input_audio.mp3"
#########################存储录入的文件#####################
# 对话机器人的图标
img={'assistant':'./source/bot.png','user':None}


if "messages" not in st.session_state:
st.session_state.messages = []
Expand All @@ -47,6 +53,7 @@ def my_recorder():
print(f'record {i}')
imgs,audio = record()
input_text,code_status,request_id = audio2text_from_bytes(audio.get_wav_data())
# 过滤一些无意义的文本
if input_text and len(input_text)>5:
q.put((imgs,audio,input_text))
else:
Expand Down Expand Up @@ -97,7 +104,7 @@ def response(prompt=None,imgs=None,autoplay=True,audio_response=True):
st.session_state.messages.append({"role": "user", "content": prompt})
# Display assistant response in chat message container
with st.chat_message("assistant",avatar='./source/bot.png'):
res = gpt4v_client(query=prompt,imgs=imgs)
res = responser(query=prompt,imgs=imgs)
print('res[text]:',res['text'])
if audio_response:
sound,rate,byte_sound_array = text2audio(res["text"])
Expand Down Expand Up @@ -127,6 +134,7 @@ def response(prompt=None,imgs=None,autoplay=True,audio_response=True):
video_show.camera_input('tt',label_visibility='hidden')
# 开始录入输入
if video_show.button('开始对话'):
st.info(f'开始监听麦克风...')
record_thread.start()
else:
st.stop()
Expand Down
1 change: 0 additions & 1 deletion real_gemini/utils_st/get_gpt4v_response.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import base64
import json
import requests
import numpy as np
import cv2
Expand Down
65 changes: 65 additions & 0 deletions real_gemini/utils_st/get_qwen_response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import base64
import requests
import numpy as np
import cv2
# from real_gemini.gpt4v import GPT4V


def img2base64(imgs):
base64_imgs = []
if isinstance(imgs,list):
for img in imgs:
# with open(img,'rb') as f:
if isinstance(img,str):
with open(img,'rb') as f:
img_bytes = f.read()
else:
img_bytes = np.array(cv2.imencode('.png', img)[1]).tobytes()
img_b64 = base64.b64encode(img_bytes).decode()
# print(img_b64)
base64_imgs.append(img_b64)
return base64_imgs
else:
# with open(imgs,'rb') as f:
if isinstance(imgs,str):
with open(imgs,'rb') as f:
img_bytes = f.read()
else:
img_bytes = np.array(cv2.imencode('.png', img)[1]).tobytes()
img_bytes = np.array(cv2.imencode('.png', img)[1]).tobytes()
img_b64 = base64.b64encode(img_bytes).decode()
base64_imgs.append(img_b64)
return base64_imgs

def QwenVL_client(query,imgs=None):
imgs = img2base64(imgs)
input_data = {
'prompt':query,
'image_strs':imgs
}
api_url = 'http://192.168.80.19:6679/qwen-vl/'
try:
resp = requests.post(
api_url,
headers={
'Content-Type':'application/x-www-form-urlencoded',
'accept':'application/json'
},
data=input_data,
)
resp_data = resp.json()
# print(resp_data)
prompt_text = resp_data
except Exception as e:
print(e)
prompt_text = '千问接口请求出错了,请确认后台服务后再尝试~'
send = {
'text':prompt_text
}
return send


if __name__ == '__main__':
imgs = ['/Users/wuziwei/git_project/Real-Gemini/source/bot.png']
r = QwenVL_client(query='描述一下这种图片',imgs=imgs)
print(r)
2 changes: 1 addition & 1 deletion run.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
export OPENAI_API_KEY='your-api-key'
export OPENAI_API_KEY='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'
streamlit run ./real_gemini/chat.py

0 comments on commit d067cc1

Please sign in to comment.