diff --git a/src/assets/img/best_visual_results.jpg b/src/assets/img/best_visual_results.jpg
new file mode 100644
index 00000000..72de2f86
Binary files /dev/null and b/src/assets/img/best_visual_results.jpg differ
diff --git a/src/assets/img/best_visual_results.png b/src/assets/img/best_visual_results.png
deleted file mode 100644
index 67565e33..00000000
Binary files a/src/assets/img/best_visual_results.png and /dev/null differ
diff --git a/src/assets/img/framework.jpg b/src/assets/img/framework.jpg
new file mode 100644
index 00000000..bf9344c2
Binary files /dev/null and b/src/assets/img/framework.jpg differ
diff --git a/src/assets/img/framework.png b/src/assets/img/framework.png
deleted file mode 100644
index 2ed98dbb..00000000
Binary files a/src/assets/img/framework.png and /dev/null differ
diff --git a/src/assets/video/ablation/exp_1.mp4 b/src/assets/video/ablation/exp_1.mp4
index 58295f22..9613eaa1 100644
Binary files a/src/assets/video/ablation/exp_1.mp4 and b/src/assets/video/ablation/exp_1.mp4 differ
diff --git a/src/assets/video/ablation/exp_2.mp4 b/src/assets/video/ablation/exp_2.mp4
index 9f631102..91607848 100644
Binary files a/src/assets/video/ablation/exp_2.mp4 and b/src/assets/video/ablation/exp_2.mp4 differ
diff --git a/src/assets/video/ablation/lip_1.mp4 b/src/assets/video/ablation/lip_1.mp4
index a8f151e9..43041254 100644
Binary files a/src/assets/video/ablation/lip_1.mp4 and b/src/assets/video/ablation/lip_1.mp4 differ
diff --git a/src/assets/video/ablation/lip_2.mp4 b/src/assets/video/ablation/lip_2.mp4
index 42b498e2..cc0a934c 100644
Binary files a/src/assets/video/ablation/lip_2.mp4 and b/src/assets/video/ablation/lip_2.mp4 differ
diff --git a/src/assets/video/ablation/pose_1.mp4 b/src/assets/video/ablation/pose_1.mp4
index 3948fd6c..c56aac82 100644
Binary files a/src/assets/video/ablation/pose_1.mp4 and b/src/assets/video/ablation/pose_1.mp4 differ
diff --git a/src/assets/video/ablation/pose_2.mp4 b/src/assets/video/ablation/pose_2.mp4
index 9a365afd..1099130b 100644
Binary files a/src/assets/video/ablation/pose_2.mp4 and b/src/assets/video/ablation/pose_2.mp4 differ
diff --git a/src/assets/video/portrait_style/3.mp4 b/src/assets/video/portrait_style/3.mp4
index e747b7f8..40d369cc 100644
Binary files a/src/assets/video/portrait_style/3.mp4 and b/src/assets/video/portrait_style/3.mp4 differ
diff --git a/src/assets/video/portrait_style/4.mp4 b/src/assets/video/portrait_style/4.mp4
index c57bea22..abe31ea9 100644
Binary files a/src/assets/video/portrait_style/4.mp4 and b/src/assets/video/portrait_style/4.mp4 differ
diff --git a/src/assets/video/singing/10.mp4 b/src/assets/video/singing/10.mp4
new file mode 100644
index 00000000..81679881
Binary files /dev/null and b/src/assets/video/singing/10.mp4 differ
diff --git a/src/assets/video/singing/4.mp4 b/src/assets/video/singing/4.mp4
new file mode 100644
index 00000000..8d070bf9
Binary files /dev/null and b/src/assets/video/singing/4.mp4 differ
diff --git a/src/assets/video/singing/5.mp4 b/src/assets/video/singing/5.mp4
new file mode 100644
index 00000000..9bc38a47
Binary files /dev/null and b/src/assets/video/singing/5.mp4 differ
diff --git a/src/assets/video/singing/6.mp4 b/src/assets/video/singing/6.mp4
new file mode 100644
index 00000000..25f5b2c1
Binary files /dev/null and b/src/assets/video/singing/6.mp4 differ
diff --git a/src/assets/video/singing/7.mp4 b/src/assets/video/singing/7.mp4
new file mode 100644
index 00000000..4acfc27c
Binary files /dev/null and b/src/assets/video/singing/7.mp4 differ
diff --git a/src/assets/video/singing/8.mp4 b/src/assets/video/singing/8.mp4
new file mode 100644
index 00000000..8d070bf9
Binary files /dev/null and b/src/assets/video/singing/8.mp4 differ
diff --git a/src/assets/video/singing/9.mp4 b/src/assets/video/singing/9.mp4
new file mode 100644
index 00000000..69605cbf
Binary files /dev/null and b/src/assets/video/singing/9.mp4 differ
diff --git a/src/index.json b/src/index.json
index 34d54aac..b6472ef8 100644
--- a/src/index.json
+++ b/src/index.json
@@ -88,96 +88,103 @@
                 "github": "https://github.com/fudan-generative-vision/hallo",
                 "huggingface": "https://huggingface.co/fudan-generative-ai/hallo"
             },
-            "mainVideo": "assets/video/cross_id/1.mp4"
+            "mainVideo": ""
         }
     },
     {
         "template": "abstract",
         "props": {
-            "figure": "assets/img/best_visual_results.png",
+            "figure": "assets/img/best_visual_results.jpg",
             "content": "The field of portrait image animation, driven by speech audio input, has experienced significant advancements in the generation of realistic and dynamic portraits. This research delves into the complexities of synchronizing facial movements and creating visually appealing, temporally consistent animations within the framework of diffusion-based methodologies. Moving away from traditional paradigms that rely on parametric models for intermediate facial representations, our innovative approach embraces the end-to-end diffusion paradigm and introduces a hierarchical audio-driven visual synthesis module to enhance the precision of alignment between audio inputs and visual outputs, encompassing lip, expression, and pose motion. Our proposed network architecture seamlessly integrates diffusion-based generative models, a UNet-based denoiser, temporal alignment techniques, and a reference network. The proposed hierarchical audio-driven visual synthesis offers adaptive control over expression and pose diversity, enabling more effective personalization tailored to different identities. Through a comprehensive evaluation that incorporates both qualitative and quantitative analyses, our approach demonstrates obvious enhancements in image and video quality, lip synchronization precision, and motion diversity."
         }
     },
     {
         "template": "framework",
         "props": {
-            "image": "assets/img/framework.png",
+            "image": "assets/img/framework.jpg",
             "description": "Specifically, we integrates a reference image containing a portrait with corresponding audio input to drive portrait animation. Optional visual synthesis weights can be used to balance lip, expression, and pose weights. ReferenceNet encodes global visual texture information for consistent and controllable character animation. Face and audio encoders generate high-fidelity portrait identity features and encode audio as motion information respectively. The module of hierarchical audio-driven visual synthesis establishes relationships between audio and visual components (lips, expression, pose), with a UNet denoiser used in the diffusion process."
         }
     },
     {
-        "template": "video-comparision",
+        "template": "video-carousel",
         "props": {
-            "title": "Ablation Study-Motion Scale Control",
-            "subtitle": "Lip Control",
+            "id": "vc1",
+            "title": "Virtual Character",
             "items": [
-                [
-                    "assets/video/ablation/lip_1.mp4",
-                    "assets/video/ablation/lip_2.mp4"
-                ]
+                "assets/video/portrait_style/2.mp4",
+                "assets/video/portrait_style/1.mp4"
             ]
         }
     },
     {
-        "template": "video-comparision",
+        "template": "video-carousel",
         "props": {
+            "id": "vc2",
             "title": "",
-            "subtitle": "Expression Control",
             "items": [
-                [
-                    "assets/video/ablation/exp_1.mp4",
-                    "assets/video/ablation/exp_2.mp4"
-                ]
+                "assets/video/portrait_style/3.mp4",
+                "assets/video/portrait_style/4.mp4"
             ]
         }
     },
     {
-        "template": "video-comparision",
+        "template": "single-video",
         "props": {
-            "title": "",
-            "subtitle": "Pose Control",
+            "title": "Real character",
             "items": [
-                [
-                    "assets/video/ablation/pose_1.mp4",
-                    "assets/video/ablation/pose_2.mp4"
-                ]
+                "assets/video/cross_id/1.mp4",
+                "assets/video/cross_id/2.mp4"
             ]
         }
     },
     {
         "template": "single-video",
         "props": {
-            "title": "Singing Portrait",
+            "id": "mc1",
+            "title": "Motion Control (pose, expression, lip)",
+            "subtitle": "Pose Control",
             "items": [
-                "assets/video/singing/1.mp4",
-                "assets/video/singing/2.mp4"
+                "assets/video/ablation/pose_1.mp4",
+                "assets/video/ablation/pose_2.mp4"
             ]
         }
     },
     {
-        "template": "video-comparision",
+        "template": "single-video",
         "props": {
-            "title": "Portrait Style",
+            "id": "mc2",
+            "title": "",
+            "subtitle": "Expression Control",
             "items": [
-                [
-                    "assets/video/portrait_style/1.mp4",
-                    "assets/video/portrait_style/2.mp4"
-                ]
+                "assets/video/ablation/exp_1.mp4",
+                "assets/video/ablation/exp_2.mp4"
             ]
         }
     },
     {
-        "template": "video-comparision",
+        "template": "single-video",
         "props": {
+            "id": "mc3",
             "title": "",
+            "subtitle": "Lip Control",
             "items": [
-                [
-                    "assets/video/portrait_style/3.mp4",
-                    "assets/video/portrait_style/4.mp4"
-                ]
+                "assets/video/ablation/lip_1.mp4",
+                "assets/video/ablation/lip_2.mp4"
             ]
         }
     },
+    {
+        "template": "video-carousel",
+        "props": {
+            "title": "Singing",
+            "items": [
+                "assets/video/singing/6.mp4",
+                "assets/video/singing/5.mp4",
+                "assets/video/singing/8.mp4"
+            ],
+            "count": 3
+        }
+    },
     {
         "template": "video-carousel",
         "props": {