zhijun.li
commited on
Commit
·
5f6df4d
1
Parent(s):
4578e32
update app.py
Browse files
app.py
CHANGED
|
@@ -2,11 +2,10 @@ import os
|
|
| 2 |
import cv2
|
| 3 |
import time
|
| 4 |
import base64
|
|
|
|
| 5 |
import gradio as gr
|
| 6 |
from openai import OpenAI
|
| 7 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 8 |
-
import re
|
| 9 |
-
|
| 10 |
|
| 11 |
# --- Configuration ---
|
| 12 |
BASE_URL = "https://aistudio.baidu.com/llm/lmapi/v3"
|
|
@@ -63,10 +62,8 @@ def process_chunk_with_retry(client, chunk_index, frames_b64, max_retries=3):
|
|
| 63 |
return chunk_index, ""
|
| 64 |
|
| 65 |
def aggregate_and_generate_webpage(client, summaries):
|
| 66 |
-
"""
|
| 67 |
full_summary = "\n".join([f"Segment {i+1} Summary: {s}" for i, s in sorted(summaries.items()) if s])
|
| 68 |
-
|
| 69 |
-
# Prompt 稍微加强一点语气
|
| 70 |
final_prompt = f"""
|
| 71 |
You are an expert Frontend Engineer. Based on the video segment summaries, write a complete HTML file.
|
| 72 |
|
|
@@ -79,18 +76,16 @@ def aggregate_and_generate_webpage(client, summaries):
|
|
| 79 |
3. End directly with `</html>`.
|
| 80 |
4. NO introduction text, NO markdown backticks (```), NO explanations after the code.
|
| 81 |
"""
|
| 82 |
-
|
| 83 |
response = client.chat.completions.create(
|
| 84 |
model=MODEL_NAME,
|
| 85 |
messages=[{"role": "user", "content": final_prompt}],
|
| 86 |
temperature=0.2, top_p=0.8
|
| 87 |
)
|
| 88 |
-
|
| 89 |
content = response.choices[0].message.content
|
| 90 |
-
content = content.replace("```html", "").replace("```", "").strip()
|
| 91 |
|
|
|
|
|
|
|
| 92 |
match = re.search(r'(<!DOCTYPE html>.*</html>)', content, re.DOTALL | re.IGNORECASE)
|
| 93 |
-
|
| 94 |
if match:
|
| 95 |
content = match.group(1)
|
| 96 |
else:
|
|
@@ -100,8 +95,10 @@ def aggregate_and_generate_webpage(client, summaries):
|
|
| 100 |
|
| 101 |
return content
|
| 102 |
|
| 103 |
-
|
| 104 |
def main_process(video_file, progress=gr.Progress()):
|
|
|
|
|
|
|
|
|
|
| 105 |
api_key = os.environ.get("ERNIE_API_KEY")
|
| 106 |
if not api_key: raise gr.Error("Server Config Error: API KEY missing.")
|
| 107 |
if not video_file: raise gr.Error("Please upload a video.")
|
|
@@ -117,63 +114,83 @@ def main_process(video_file, progress=gr.Progress()):
|
|
| 117 |
|
| 118 |
client = OpenAI(api_key=api_key, base_url=BASE_URL)
|
| 119 |
|
| 120 |
-
progress(0.1, desc="Extracting frames...")
|
| 121 |
chunks = extract_frames(video_file)
|
| 122 |
if not chunks: raise gr.Error("Frame extraction failed.")
|
| 123 |
|
| 124 |
-
progress(0.3, desc="Analyzing content...")
|
| 125 |
chunk_summaries = {}
|
|
|
|
|
|
|
| 126 |
with ThreadPoolExecutor(max_workers=MAX_CONCURRENT_REQUESTS) as executor:
|
| 127 |
future_to_chunk = {executor.submit(process_chunk_with_retry, client, i, chunk): i for i, chunk in enumerate(chunks)}
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
idx, summary = future.result()
|
| 130 |
if summary: chunk_summaries[idx] = summary
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
-
progress(0.
|
| 134 |
html_code = aggregate_and_generate_webpage(client, chunk_summaries)
|
| 135 |
|
| 136 |
-
# Save file
|
| 137 |
output_path = "generated_website.html"
|
| 138 |
with open(output_path, "w", encoding="utf-8") as f:
|
| 139 |
f.write(html_code)
|
| 140 |
|
| 141 |
-
#
|
| 142 |
-
# 将 HTML 编码为 Base64,放入 iframe 的 src 中
|
| 143 |
-
# 这样实现了完美的沙箱隔离,样式不会冲突,JS 也能正常运行
|
| 144 |
b64_html = base64.b64encode(html_code.encode('utf-8')).decode('utf-8')
|
| 145 |
data_uri = f"data:text/html;charset=utf-8;base64,{b64_html}"
|
|
|
|
| 146 |
|
| 147 |
-
|
| 148 |
-
iframe_html = f"""
|
| 149 |
-
<iframe
|
| 150 |
-
src="{data_uri}"
|
| 151 |
-
width="100%"
|
| 152 |
-
height="600px"
|
| 153 |
-
style="border: 1px solid #ccc; border-radius: 8px; background-color: white;">
|
| 154 |
-
</iframe>
|
| 155 |
-
"""
|
| 156 |
-
|
| 157 |
-
progress(1.0, desc="Done!")
|
| 158 |
-
# 返回 iframe 字符串给 HTML 组件,返回路径给下载组件,返回源码给 Code 组件
|
| 159 |
return iframe_html, output_path, html_code
|
| 160 |
|
| 161 |
# --- UI ---
|
| 162 |
|
| 163 |
with gr.Blocks(title="Ernie 4.5 Video2Code", theme=gr.themes.Soft()) as demo:
|
| 164 |
-
gr.Markdown("# 🎬 Ernie 4.5-VL: Video to Code Agent")
|
| 165 |
-
gr.Markdown("Upload a frontend video tutorial. The AI will generate and **render** the code instantly.")
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
with gr.Row():
|
| 168 |
with gr.Column(scale=1):
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
submit_btn = gr.Button("🚀 Generate & Render", variant="primary", size="lg")
|
| 171 |
|
| 172 |
with gr.Column(scale=2):
|
| 173 |
-
#
|
| 174 |
with gr.Tabs():
|
| 175 |
with gr.TabItem("🌐 Live Preview (Result)"):
|
| 176 |
-
# 这个组件现在接收的是 iframe 字符串
|
| 177 |
html_preview = gr.HTML(label="Rendered Page")
|
| 178 |
|
| 179 |
with gr.TabItem("📝 Source Code"):
|
|
|
|
| 2 |
import cv2
|
| 3 |
import time
|
| 4 |
import base64
|
| 5 |
+
import re
|
| 6 |
import gradio as gr
|
| 7 |
from openai import OpenAI
|
| 8 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
|
|
|
| 9 |
|
| 10 |
# --- Configuration ---
|
| 11 |
BASE_URL = "https://aistudio.baidu.com/llm/lmapi/v3"
|
|
|
|
| 62 |
return chunk_index, ""
|
| 63 |
|
| 64 |
def aggregate_and_generate_webpage(client, summaries):
|
| 65 |
+
"""Generate final HTML."""
|
| 66 |
full_summary = "\n".join([f"Segment {i+1} Summary: {s}" for i, s in sorted(summaries.items()) if s])
|
|
|
|
|
|
|
| 67 |
final_prompt = f"""
|
| 68 |
You are an expert Frontend Engineer. Based on the video segment summaries, write a complete HTML file.
|
| 69 |
|
|
|
|
| 76 |
3. End directly with `</html>`.
|
| 77 |
4. NO introduction text, NO markdown backticks (```), NO explanations after the code.
|
| 78 |
"""
|
|
|
|
| 79 |
response = client.chat.completions.create(
|
| 80 |
model=MODEL_NAME,
|
| 81 |
messages=[{"role": "user", "content": final_prompt}],
|
| 82 |
temperature=0.2, top_p=0.8
|
| 83 |
)
|
|
|
|
| 84 |
content = response.choices[0].message.content
|
|
|
|
| 85 |
|
| 86 |
+
# Regex Cleaning
|
| 87 |
+
content = content.replace("```html", "").replace("```", "").strip()
|
| 88 |
match = re.search(r'(<!DOCTYPE html>.*</html>)', content, re.DOTALL | re.IGNORECASE)
|
|
|
|
| 89 |
if match:
|
| 90 |
content = match.group(1)
|
| 91 |
else:
|
|
|
|
| 95 |
|
| 96 |
return content
|
| 97 |
|
|
|
|
| 98 |
def main_process(video_file, progress=gr.Progress()):
|
| 99 |
+
# Clean progress bar logic: explicitly call progress()
|
| 100 |
+
progress(0, desc="Starting...")
|
| 101 |
+
|
| 102 |
api_key = os.environ.get("ERNIE_API_KEY")
|
| 103 |
if not api_key: raise gr.Error("Server Config Error: API KEY missing.")
|
| 104 |
if not video_file: raise gr.Error("Please upload a video.")
|
|
|
|
| 114 |
|
| 115 |
client = OpenAI(api_key=api_key, base_url=BASE_URL)
|
| 116 |
|
| 117 |
+
progress(0.1, desc="Step 1/3: Extracting frames...")
|
| 118 |
chunks = extract_frames(video_file)
|
| 119 |
if not chunks: raise gr.Error("Frame extraction failed.")
|
| 120 |
|
| 121 |
+
progress(0.3, desc="Step 2/3: ERNIE Analyzing content...")
|
| 122 |
chunk_summaries = {}
|
| 123 |
+
|
| 124 |
+
# Using ThreadPool without tqdm to avoid UI glitches
|
| 125 |
with ThreadPoolExecutor(max_workers=MAX_CONCURRENT_REQUESTS) as executor:
|
| 126 |
future_to_chunk = {executor.submit(process_chunk_with_retry, client, i, chunk): i for i, chunk in enumerate(chunks)}
|
| 127 |
+
|
| 128 |
+
total_chunks = len(chunks)
|
| 129 |
+
completed = 0
|
| 130 |
+
|
| 131 |
+
for future in as_completed(future_to_chunk):
|
| 132 |
idx, summary = future.result()
|
| 133 |
if summary: chunk_summaries[idx] = summary
|
| 134 |
+
|
| 135 |
+
completed += 1
|
| 136 |
+
# Smooth progress update from 0.3 to 0.8
|
| 137 |
+
current_progress = 0.3 + (0.5 * (completed / total_chunks))
|
| 138 |
+
progress(current_progress, desc=f"Step 2/3: Analyzing segment {completed}/{total_chunks}")
|
| 139 |
|
| 140 |
+
progress(0.85, desc="Step 3/3: Synthesizing final code...")
|
| 141 |
html_code = aggregate_and_generate_webpage(client, chunk_summaries)
|
| 142 |
|
|
|
|
| 143 |
output_path = "generated_website.html"
|
| 144 |
with open(output_path, "w", encoding="utf-8") as f:
|
| 145 |
f.write(html_code)
|
| 146 |
|
| 147 |
+
# Create Iframe
|
|
|
|
|
|
|
| 148 |
b64_html = base64.b64encode(html_code.encode('utf-8')).decode('utf-8')
|
| 149 |
data_uri = f"data:text/html;charset=utf-8;base64,{b64_html}"
|
| 150 |
+
iframe_html = f"""<iframe src="{data_uri}" width="100%" height="600px" style="border: 1px solid #ccc; border-radius: 8px; background-color: white;"></iframe>"""
|
| 151 |
|
| 152 |
+
progress(1.0, desc="Completed!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
return iframe_html, output_path, html_code
|
| 154 |
|
| 155 |
# --- UI ---
|
| 156 |
|
| 157 |
with gr.Blocks(title="Ernie 4.5 Video2Code", theme=gr.themes.Soft()) as demo:
|
|
|
|
|
|
|
| 158 |
|
| 159 |
+
# --- Header & Description (Goal 3) ---
|
| 160 |
+
gr.Markdown("# ⚡ ERNIE 4.5-VL: Video to Code Agent")
|
| 161 |
+
|
| 162 |
+
with gr.Accordion("📚 Technical Capabilities of ERNIE 4.5-VL", open=False):
|
| 163 |
+
gr.Markdown("""
|
| 164 |
+
This application is powered by **Baidu ERNIE 4.5 **, a state-of-the-art foundation model with specific enhancements for video understanding:
|
| 165 |
+
|
| 166 |
+
* **👁️ Multimodal Heterogeneous MoE**: Uses dedicated vision experts to process images and video frames without interfering with text generation capabilities.
|
| 167 |
+
* **⏳ 3D-RoPE Temporal Modeling**: Incorporates 3D Rotary Position Embeddings to independently encode temporal, width, and height information, allowing precise understanding of event sequences in videos.
|
| 168 |
+
* **📐 Adaptive Resolution**: Dynamically adjusts to different video aspect ratios, ensuring fine-grained details (like small code font on screen) are captured accurately.
|
| 169 |
+
* **🚀 Long Context Window**: Supports up to 128k context length, enabling the analysis of longer tutorials and complex logic flows.
|
| 170 |
+
""")
|
| 171 |
+
|
| 172 |
+
gr.Markdown("Upload a frontend coding tutorial video (or try the example below). The AI will watch it, understand the code, and render the result instantly.")
|
| 173 |
+
|
| 174 |
with gr.Row():
|
| 175 |
with gr.Column(scale=1):
|
| 176 |
+
# --- Input Section ---
|
| 177 |
+
video_input = gr.Video(label="Upload Video", format="mp4", height=320)
|
| 178 |
+
|
| 179 |
+
# --- Goal 1: Examples Component ---
|
| 180 |
+
# 用户点击这里的视频,会自动填充到上面的 video_input 中
|
| 181 |
+
gr.Examples(
|
| 182 |
+
examples=[["sample_demo.mp4"]], # ⚠️ 确保你上传了名为 sample_demo.mp4 的文件
|
| 183 |
+
inputs=[video_input],
|
| 184 |
+
label="▶️ Or try this example video:",
|
| 185 |
+
cache_examples=False # 关闭缓存以节省空间
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
submit_btn = gr.Button("🚀 Generate & Render", variant="primary", size="lg")
|
| 189 |
|
| 190 |
with gr.Column(scale=2):
|
| 191 |
+
# --- Output Section ---
|
| 192 |
with gr.Tabs():
|
| 193 |
with gr.TabItem("🌐 Live Preview (Result)"):
|
|
|
|
| 194 |
html_preview = gr.HTML(label="Rendered Page")
|
| 195 |
|
| 196 |
with gr.TabItem("📝 Source Code"):
|