From d067cc17eb92709fa3b6d272a1e1a9a9ad2a600c Mon Sep 17 00:00:00 2001 From: wzw Date: Fri, 29 Dec 2023 17:46:54 +0800 Subject: [PATCH] update qwenvl --- real_gemini/chat.py | 14 ++++- real_gemini/utils_st/get_gpt4v_response.py | 1 - real_gemini/utils_st/get_qwen_response.py | 65 ++++++++++++++++++++++ run.sh | 2 +- 4 files changed, 77 insertions(+), 5 deletions(-) create mode 100644 real_gemini/utils_st/get_qwen_response.py diff --git a/real_gemini/chat.py b/real_gemini/chat.py index ef63caf..da8d04d 100644 --- a/real_gemini/chat.py +++ b/real_gemini/chat.py @@ -3,7 +3,8 @@ from pathlib import Path from utils_st.audio2text import audio2text_from_bytes from utils_st.extracte_img import get_main_img -from utils_st.get_gpt4v_response import gpt4v,gpt4v_client +from utils_st.get_gpt4v_response import gpt4v_client +from utils_st.get_qwen_response import QwenVL_client from utils_st.text2audio import text2audio,autoplay_audio from utils_st.record_video import record from queue import Queue @@ -11,6 +12,9 @@ import cv2 from threading import Thread,Event +img = {'assistant':'./source/bot.png','user':None} +res_ = {'Qwen-vl':QwenVL_client,'gpt4v':gpt4v_client} + # 设置事件锁 event_record = Event() event_chat = Event() @@ -19,7 +23,9 @@ with st.sidebar: with st.form('参数配置'): max_chat_turn = st.slider('最大对话轮数:',min_value=1,max_value=10000,value=10) + response_name = st.selectbox('选择模型',['Qwen-vl','gpt4v'],index=1) st.form_submit_button('提交配置') +responser = res_[response_name] max_record_round = 2*max_chat_turn q = Queue(max_record_round) @@ -34,7 +40,7 @@ # in_file_audio = RECORD_DIR / f"{prefix}_input_audio.mp3" #########################存储录入的文件##################### # 对话机器人的图标 -img={'assistant':'./source/bot.png','user':None} + if "messages" not in st.session_state: st.session_state.messages = [] @@ -47,6 +53,7 @@ def my_recorder(): print(f'record {i}') imgs,audio = record() input_text,code_status,request_id = audio2text_from_bytes(audio.get_wav_data()) + # 过滤一些无意义的文本 if input_text and len(input_text)>5: q.put((imgs,audio,input_text)) else: @@ -97,7 +104,7 @@ def response(prompt=None,imgs=None,autoplay=True,audio_response=True): st.session_state.messages.append({"role": "user", "content": prompt}) # Display assistant response in chat message container with st.chat_message("assistant",avatar='./source/bot.png'): - res = gpt4v_client(query=prompt,imgs=imgs) + res = responser(query=prompt,imgs=imgs) print('res[text]:',res['text']) if audio_response: sound,rate,byte_sound_array = text2audio(res["text"]) @@ -127,6 +134,7 @@ def response(prompt=None,imgs=None,autoplay=True,audio_response=True): video_show.camera_input('tt',label_visibility='hidden') # 开始录入输入 if video_show.button('开始对话'): + st.info(f'开始监听麦克风...') record_thread.start() else: st.stop() diff --git a/real_gemini/utils_st/get_gpt4v_response.py b/real_gemini/utils_st/get_gpt4v_response.py index 45c5d01..f8b408d 100644 --- a/real_gemini/utils_st/get_gpt4v_response.py +++ b/real_gemini/utils_st/get_gpt4v_response.py @@ -1,5 +1,4 @@ import base64 -import json import requests import numpy as np import cv2 diff --git a/real_gemini/utils_st/get_qwen_response.py b/real_gemini/utils_st/get_qwen_response.py new file mode 100644 index 0000000..69ca56f --- /dev/null +++ b/real_gemini/utils_st/get_qwen_response.py @@ -0,0 +1,65 @@ +import base64 +import requests +import numpy as np +import cv2 +# from real_gemini.gpt4v import GPT4V + + +def img2base64(imgs): + base64_imgs = [] + if isinstance(imgs,list): + for img in imgs: + # with open(img,'rb') as f: + if isinstance(img,str): + with open(img,'rb') as f: + img_bytes = f.read() + else: + img_bytes = np.array(cv2.imencode('.png', img)[1]).tobytes() + img_b64 = base64.b64encode(img_bytes).decode() + # print(img_b64) + base64_imgs.append(img_b64) + return base64_imgs + else: + # with open(imgs,'rb') as f: + if isinstance(imgs,str): + with open(imgs,'rb') as f: + img_bytes = f.read() + else: + img_bytes = np.array(cv2.imencode('.png', img)[1]).tobytes() + img_bytes = np.array(cv2.imencode('.png', img)[1]).tobytes() + img_b64 = base64.b64encode(img_bytes).decode() + base64_imgs.append(img_b64) + return base64_imgs + +def QwenVL_client(query,imgs=None): + imgs = img2base64(imgs) + input_data = { + 'prompt':query, + 'image_strs':imgs + } + api_url = 'http://192.168.80.19:6679/qwen-vl/' + try: + resp = requests.post( + api_url, + headers={ + 'Content-Type':'application/x-www-form-urlencoded', + 'accept':'application/json' + }, + data=input_data, + ) + resp_data = resp.json() + # print(resp_data) + prompt_text = resp_data + except Exception as e: + print(e) + prompt_text = '千问接口请求出错了,请确认后台服务后再尝试~' + send = { + 'text':prompt_text + } + return send + + +if __name__ == '__main__': + imgs = ['/Users/wuziwei/git_project/Real-Gemini/source/bot.png'] + r = QwenVL_client(query='描述一下这种图片',imgs=imgs) + print(r) \ No newline at end of file diff --git a/run.sh b/run.sh index 51a9156..b2b889c 100644 --- a/run.sh +++ b/run.sh @@ -1,2 +1,2 @@ -export OPENAI_API_KEY='your-api-key' +export OPENAI_API_KEY='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' streamlit run ./real_gemini/chat.py \ No newline at end of file