Merge branch 'main' of https://github.com/airaria/Visual-Chinese-LLaM…

…A-Alpaca
airaria · Jul 19, 2023 · c4929c3 · c4929c3
2 parents 30cabd8 + 774d6df
commit c4929c3
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -36,6 +36,9 @@
 
 [**中文LLaMA-2&Alpaca-2大模型**](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2) | [中文LLaMA&Alpaca大模型](https://github.com/ymcui/Chinese-LLaMA-Alpaca/tree/main) | [多模态VLE](https://github.com/iflytek/VLE) | [中文MiniRBT](https://github.com/iflytek/MiniRBT) | [中文LERT](https://github.com/ymcui/LERT) | [中英文PERT](https://github.com/ymcui/PERT) | [中文MacBERT](https://github.com/ymcui/MacBERT) | [中文ELECTRA](https://github.com/ymcui/Chinese-ELECTRA) | [中文XLNet](https://github.com/ymcui/Chinese-XLNet) | [中文BERT](https://github.com/ymcui/Chinese-BERT-wwm) | [知识蒸馏工具TextBrewer](https://github.com/airaria/TextBrewer) | [模型裁剪工具TextPruner](https://github.com/airaria/TextPruner)
 
+## 新闻 <!-- omit from toc --> 
+
+**[2023/07/18] Demo添加了Webcam支持，可以从直接摄像头拍摄照片**
 
 ## 内容导引 <!-- omit from toc --> 
 
@@ -360,4 +363,4 @@ python scripts/inference/gradio_demo.py --visualcla_model visualcla_model --load
 
 **本项目相关资源仅供学术研究之用，严禁用于商业用途。** 使用涉及第三方代码的部分时，请严格遵循相应的开源协议。模型生成的内容受模型计算、随机性和量化精度损失等因素影响，本项目不对其准确性作出保证。对于模型输出的任何内容，本项目不承担任何法律责任，亦不对因使用相关资源和输出结果而可能产生的任何损失承担责任。
 
-本项目由个人及协作者业余时间发起并维护，因此无法保证能及时回复解决相应问题。
+本项目由个人及协作者业余时间发起并维护，因此无法保证能及时回复解决相应问题。
diff --git a/README_EN.md b/README_EN.md
@@ -37,6 +37,10 @@ The main features of this project include:
 
 [**Chinese-LLaMA-Alpaca-2**](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2) | [Chinese-LLaMA-Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) | [Multi-modal VLE](https://github.com/iflytek/VLE) | [Chinese MiniRBT](https://github.com/iflytek/MiniRBT) | [Chinese LERT](https://github.com/ymcui/LERT) | [Chinese-English PERT](https://github.com/ymcui/PERT) | [Chinese MacBERT](https://github.com/ymcui/MacBERT) | [Chinese ELECTRA](https://github.com/ymcui/Chinese-ELECTRA) | [Chinese XLNet](https://github.com/ymcui/Chinese-XLNet) | [Chinese BERT](https://github.com/ymcui/Chinese-BERT-wwm) | [Knowledge distillation tool TextBrewer](https://github.com/airaria/TextBrewer) | [Model pruning tool TextPruner](https://github.com/airaria/TextPruner)
 
+## News <!-- omit from toc --> 
+
+[2023/07/18] Add webcam support for Gradio demo: Users can take and upload their images with their webcams
+
 ## Table of Contents <!-- omit from toc --> 
 
 - [Intruction](#intruction)

diff --git a/scripts/inference/gradio_demo.py b/scripts/inference/gradio_demo.py
@@ -73,7 +73,15 @@ def parse_text(text):
     return text
 
 
-def predict(input_text, image_path, chatbot, max_new_tokens, top_p, top_k, temperature, history):
+def predict(input_text, image_path_upload, image_path_webcam, chatbot, max_new_tokens, top_p, top_k, temperature, history, selected):
+    if selected=='Upload':
+        image_path = image_path_upload
+        print("Image from upload: ", image_path_upload)
+    elif selected=='Webcam':
+        image_path = image_path_webcam
+        print("Image from webcam: ", image_path_webcam)
+    else:
+        raise ValueError(selected)
     DEFAULT_GENERATION_CONFIG.top_p = top_p
     DEFAULT_GENERATION_CONFIG.top_k = top_k
     DEFAULT_GENERATION_CONFIG.max_new_tokens = max_new_tokens
@@ -125,6 +133,11 @@ def main():
     model.eval()
 
     with gr.Blocks(theme=gr.themes.Default()) as demo:
+
+        selected_state = gr.State("Upload")
+        def on_select(evt: gr.SelectData):  # SelectData is a subclass of EventData
+            return evt.value
+
         github_banner_path = 'https://raw.githubusercontent.com/airaria/Visual-Chinese-LLaMA-Alpaca/main/pics/banner.png'
         gr.HTML(f'<p align="center"><a href="https://github.com/airaria/Visual-Chinese-LLaMA-Alpaca"><img src={github_banner_path} width="700"/></a></p>')
         with gr.Row():
@@ -136,19 +149,25 @@ def main():
                         submitBtn = gr.Button("提交", variant="primary")
                         emptyBtn = gr.Button("清除")
             with gr.Column(scale=2.5):
-                    image_path = gr.Image(type="pil", label="Image", value=None).style(height=310)
+                    with gr.Tab("Upload") as t1:
+                        image_path_upload = gr.Image(type="pil", label="Image", value=None).style(height=310)
+                        t1.select(on_select,outputs=selected_state)
+                    with gr.Tab("Webcam") as t2:
+                        image_path_webcam = gr.Image(type="pil", label="Image", value=None, source='webcam')
+                        t2.select(on_select, outputs=selected_state)
                     max_new_tokens = gr.Slider(0, 1024, value=512, step=1.0, label="Max new tokens", interactive=True)
                     top_p = gr.Slider(0, 1, value=0.9, step=0.01, label="Top P", interactive=True)
                     top_k = gr.Slider(0, 100, value=40, step=1, label="Top K", interactive=True)
                     temperature = gr.Slider(0, 1, value=0.5, step=0.01, label="Temperature", interactive=True)
 
         history = gr.State([])
 
-        submitBtn.click(predict, [user_input, image_path, chatbot, max_new_tokens, top_p, top_k, temperature, history], [chatbot, history],
+        submitBtn.click(predict, [user_input, image_path_upload, image_path_webcam, chatbot, max_new_tokens, top_p, top_k, temperature, history, selected_state], [chatbot, history],
                         show_progress=True)
-        image_path.clear(reset_state, outputs=[image_path, chatbot, history], show_progress=True)
+        image_path_upload.clear(reset_state, outputs=[image_path_upload, chatbot, history], show_progress=True)
+        image_path_webcam.clear(reset_state, outputs=[image_path_webcam, chatbot, history], show_progress=True)
         submitBtn.click(reset_user_input, [], [user_input])
-        emptyBtn.click(reset_state, outputs=[image_path, chatbot, history], show_progress=True)
+        emptyBtn.click(lambda: (None, None, [], []), outputs=[image_path_upload, image_path_webcam, chatbot, history], show_progress=True)
 
         print(gr.__version__)