Merge pull request #6 from airaria/add-webcam-support

add webcam support
airaria · Jul 18, 2023 · d83488a · d83488a
2 parents dfb51ca + f1e0d6c
commit d83488a
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -36,6 +36,9 @@
 
 [中文LLaMA&Alpaca大模型](https://github.com/ymcui/Chinese-LLaMA-Alpaca/tree/main) | [多模态VLE](https://github.com/iflytek/VLE) | [中文MiniRBT](https://github.com/iflytek/MiniRBT) | [中文LERT](https://github.com/ymcui/LERT) | [中英文PERT](https://github.com/ymcui/PERT) | [中文MacBERT](https://github.com/ymcui/MacBERT) | [中文ELECTRA](https://github.com/ymcui/Chinese-ELECTRA) | [中文XLNet](https://github.com/ymcui/Chinese-XLNet) | [中文BERT](https://github.com/ymcui/Chinese-BERT-wwm) | [知识蒸馏工具TextBrewer](https://github.com/airaria/TextBrewer) | [模型裁剪工具TextPruner](https://github.com/airaria/TextPruner)
 
+## 新闻 <!-- omit from toc --> 
+
+[2023/07/18] 为demo添加了Webcam交互方式，可以从直接摄像头拍摄照片
 
 ## 内容导引 <!-- omit from toc --> 
 

diff --git a/README_EN.md b/README_EN.md
@@ -37,6 +37,10 @@ The main features of this project include:
 
 [Chinese-LLaMA-Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) | [Multi-modal VLE](https://github.com/iflytek/VLE) | [Chinese MiniRBT](https://github.com/iflytek/MiniRBT) | [Chinese LERT](https://github.com/ymcui/LERT) | [Chinese-English PERT](https://github.com/ymcui/PERT) | [Chinese MacBERT](https://github.com/ymcui/MacBERT) | [Chinese ELECTRA](https://github.com/ymcui/Chinese-ELECTRA) | [Chinese XLNet](https://github.com/ymcui/Chinese-XLNet) | [Chinese BERT](https://github.com/ymcui/Chinese-BERT-wwm) | [Knowledge distillation tool TextBrewer](https://github.com/airaria/TextBrewer) | [Model pruning tool TextPruner](https://github.com/airaria/TextPruner)
 
+## News <!-- omit from toc --> 
+
+[2023/07/18] Add webcam support for Gradio demo: Users can take and upload their images with their webcams
+
 ## Table of Contents <!-- omit from toc --> 
 
 - [Intruction](#intruction)

diff --git a/scripts/inference/gradio_demo.py b/scripts/inference/gradio_demo.py
@@ -73,7 +73,15 @@ def parse_text(text):
     return text
 
 
-def predict(input_text, image_path, chatbot, max_new_tokens, top_p, top_k, temperature, history):
+def predict(input_text, image_path_upload, image_path_webcam, chatbot, max_new_tokens, top_p, top_k, temperature, history, selected):
+    if selected=='Upload':
+        image_path = image_path_upload
+        print("Image from upload: ", image_path_upload)
+    elif selected=='Webcam':
+        image_path = image_path_webcam
+        print("Image from webcam: ", image_path_webcam)
+    else:
+        raise ValueError(selected)
     DEFAULT_GENERATION_CONFIG.top_p = top_p
     DEFAULT_GENERATION_CONFIG.top_k = top_k
     DEFAULT_GENERATION_CONFIG.max_new_tokens = max_new_tokens
@@ -125,6 +133,11 @@ def main():
     model.eval()
 
     with gr.Blocks(theme=gr.themes.Default()) as demo:
+
+        selected_state = gr.State("Upload")
+        def on_select(evt: gr.SelectData):  # SelectData is a subclass of EventData
+            return evt.value
+
         github_banner_path = 'https://raw.githubusercontent.com/airaria/Visual-Chinese-LLaMA-Alpaca/main/pics/banner.png'
         gr.HTML(f'<p align="center"><a href="https://github.com/airaria/Visual-Chinese-LLaMA-Alpaca"><img src={github_banner_path} width="700"/></a></p>')
         with gr.Row():
@@ -136,19 +149,25 @@ def main():
                         submitBtn = gr.Button("提交", variant="primary")
                         emptyBtn = gr.Button("清除")
             with gr.Column(scale=2.5):
-                    image_path = gr.Image(type="pil", label="Image", value=None).style(height=310)
+                    with gr.Tab("Upload") as t1:
+                        image_path_upload = gr.Image(type="pil", label="Image", value=None).style(height=310)
+                        t1.select(on_select,outputs=selected_state)
+                    with gr.Tab("Webcam") as t2:
+                        image_path_webcam = gr.Image(type="pil", label="Image", value=None, source='webcam')
+                        t2.select(on_select, outputs=selected_state)
                     max_new_tokens = gr.Slider(0, 1024, value=512, step=1.0, label="Max new tokens", interactive=True)
                     top_p = gr.Slider(0, 1, value=0.9, step=0.01, label="Top P", interactive=True)
                     top_k = gr.Slider(0, 100, value=40, step=1, label="Top K", interactive=True)
                     temperature = gr.Slider(0, 1, value=0.5, step=0.01, label="Temperature", interactive=True)
 
         history = gr.State([])
 
-        submitBtn.click(predict, [user_input, image_path, chatbot, max_new_tokens, top_p, top_k, temperature, history], [chatbot, history],
+        submitBtn.click(predict, [user_input, image_path_upload, image_path_webcam, chatbot, max_new_tokens, top_p, top_k, temperature, history, selected_state], [chatbot, history],
                         show_progress=True)
-        image_path.clear(reset_state, outputs=[image_path, chatbot, history], show_progress=True)
+        image_path_upload.clear(reset_state, outputs=[image_path_upload, chatbot, history], show_progress=True)
+        image_path_webcam.clear(reset_state, outputs=[image_path_webcam, chatbot, history], show_progress=True)
         submitBtn.click(reset_user_input, [], [user_input])
-        emptyBtn.click(reset_state, outputs=[image_path, chatbot, history], show_progress=True)
+        emptyBtn.click(lambda: (None, None, [], []), outputs=[image_path_upload, image_path_webcam, chatbot, history], show_progress=True)
 
         print(gr.__version__)