zhijun.li commited on
Commit
5f6df4d
·
1 Parent(s): 4578e32

update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -36
app.py CHANGED
@@ -2,11 +2,10 @@ import os
2
  import cv2
3
  import time
4
  import base64
 
5
  import gradio as gr
6
  from openai import OpenAI
7
  from concurrent.futures import ThreadPoolExecutor, as_completed
8
- import re
9
-
10
 
11
  # --- Configuration ---
12
  BASE_URL = "https://aistudio.baidu.com/llm/lmapi/v3"
@@ -63,10 +62,8 @@ def process_chunk_with_retry(client, chunk_index, frames_b64, max_retries=3):
63
  return chunk_index, ""
64
 
65
  def aggregate_and_generate_webpage(client, summaries):
66
- """Aggregate summaries and generate final HTML."""
67
  full_summary = "\n".join([f"Segment {i+1} Summary: {s}" for i, s in sorted(summaries.items()) if s])
68
-
69
- # Prompt 稍微加强一点语气
70
  final_prompt = f"""
71
  You are an expert Frontend Engineer. Based on the video segment summaries, write a complete HTML file.
72
 
@@ -79,18 +76,16 @@ def aggregate_and_generate_webpage(client, summaries):
79
  3. End directly with `</html>`.
80
  4. NO introduction text, NO markdown backticks (```), NO explanations after the code.
81
  """
82
-
83
  response = client.chat.completions.create(
84
  model=MODEL_NAME,
85
  messages=[{"role": "user", "content": final_prompt}],
86
  temperature=0.2, top_p=0.8
87
  )
88
-
89
  content = response.choices[0].message.content
90
- content = content.replace("```html", "").replace("```", "").strip()
91
 
 
 
92
  match = re.search(r'(<!DOCTYPE html>.*</html>)', content, re.DOTALL | re.IGNORECASE)
93
-
94
  if match:
95
  content = match.group(1)
96
  else:
@@ -100,8 +95,10 @@ def aggregate_and_generate_webpage(client, summaries):
100
 
101
  return content
102
 
103
-
104
  def main_process(video_file, progress=gr.Progress()):
 
 
 
105
  api_key = os.environ.get("ERNIE_API_KEY")
106
  if not api_key: raise gr.Error("Server Config Error: API KEY missing.")
107
  if not video_file: raise gr.Error("Please upload a video.")
@@ -117,63 +114,83 @@ def main_process(video_file, progress=gr.Progress()):
117
 
118
  client = OpenAI(api_key=api_key, base_url=BASE_URL)
119
 
120
- progress(0.1, desc="Extracting frames...")
121
  chunks = extract_frames(video_file)
122
  if not chunks: raise gr.Error("Frame extraction failed.")
123
 
124
- progress(0.3, desc="Analyzing content...")
125
  chunk_summaries = {}
 
 
126
  with ThreadPoolExecutor(max_workers=MAX_CONCURRENT_REQUESTS) as executor:
127
  future_to_chunk = {executor.submit(process_chunk_with_retry, client, i, chunk): i for i, chunk in enumerate(chunks)}
128
- for i, future in enumerate(as_completed(future_to_chunk)):
 
 
 
 
129
  idx, summary = future.result()
130
  if summary: chunk_summaries[idx] = summary
131
- progress(0.3 + 0.5 * ((i+1)/len(chunks)), desc=f"Analyzed {i+1}/{len(chunks)}")
 
 
 
 
132
 
133
- progress(0.8, desc="Synthesizing code...")
134
  html_code = aggregate_and_generate_webpage(client, chunk_summaries)
135
 
136
- # Save file
137
  output_path = "generated_website.html"
138
  with open(output_path, "w", encoding="utf-8") as f:
139
  f.write(html_code)
140
 
141
- # --- 关键修改:制作 Data URI Iframe ---
142
- # 将 HTML 编码为 Base64,放入 iframe 的 src 中
143
- # 这样实现了完美的沙箱隔离,样式不会冲突,JS 也能正常运行
144
  b64_html = base64.b64encode(html_code.encode('utf-8')).decode('utf-8')
145
  data_uri = f"data:text/html;charset=utf-8;base64,{b64_html}"
 
146
 
147
- # 创建一个 HTML 字符串,里面包含一个 iframe
148
- iframe_html = f"""
149
- <iframe
150
- src="{data_uri}"
151
- width="100%"
152
- height="600px"
153
- style="border: 1px solid #ccc; border-radius: 8px; background-color: white;">
154
- </iframe>
155
- """
156
-
157
- progress(1.0, desc="Done!")
158
- # 返回 iframe 字符串给 HTML 组件,返回路径给下载组件,返回源码给 Code 组件
159
  return iframe_html, output_path, html_code
160
 
161
  # --- UI ---
162
 
163
  with gr.Blocks(title="Ernie 4.5 Video2Code", theme=gr.themes.Soft()) as demo:
164
- gr.Markdown("# 🎬 Ernie 4.5-VL: Video to Code Agent")
165
- gr.Markdown("Upload a frontend video tutorial. The AI will generate and **render** the code instantly.")
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  with gr.Row():
168
  with gr.Column(scale=1):
169
- video_input = gr.Video(label="Upload Video", format="mp4", height=300)
 
 
 
 
 
 
 
 
 
 
 
170
  submit_btn = gr.Button("🚀 Generate & Render", variant="primary", size="lg")
171
 
172
  with gr.Column(scale=2):
173
- # 直接展示预览,不再隐藏在 Tab 里,或者设为默认 Tab
174
  with gr.Tabs():
175
  with gr.TabItem("🌐 Live Preview (Result)"):
176
- # 这个组件现在接收的是 iframe 字符串
177
  html_preview = gr.HTML(label="Rendered Page")
178
 
179
  with gr.TabItem("📝 Source Code"):
 
2
  import cv2
3
  import time
4
  import base64
5
+ import re
6
  import gradio as gr
7
  from openai import OpenAI
8
  from concurrent.futures import ThreadPoolExecutor, as_completed
 
 
9
 
10
  # --- Configuration ---
11
  BASE_URL = "https://aistudio.baidu.com/llm/lmapi/v3"
 
62
  return chunk_index, ""
63
 
64
  def aggregate_and_generate_webpage(client, summaries):
65
+ """Generate final HTML."""
66
  full_summary = "\n".join([f"Segment {i+1} Summary: {s}" for i, s in sorted(summaries.items()) if s])
 
 
67
  final_prompt = f"""
68
  You are an expert Frontend Engineer. Based on the video segment summaries, write a complete HTML file.
69
 
 
76
  3. End directly with `</html>`.
77
  4. NO introduction text, NO markdown backticks (```), NO explanations after the code.
78
  """
 
79
  response = client.chat.completions.create(
80
  model=MODEL_NAME,
81
  messages=[{"role": "user", "content": final_prompt}],
82
  temperature=0.2, top_p=0.8
83
  )
 
84
  content = response.choices[0].message.content
 
85
 
86
+ # Regex Cleaning
87
+ content = content.replace("```html", "").replace("```", "").strip()
88
  match = re.search(r'(<!DOCTYPE html>.*</html>)', content, re.DOTALL | re.IGNORECASE)
 
89
  if match:
90
  content = match.group(1)
91
  else:
 
95
 
96
  return content
97
 
 
98
  def main_process(video_file, progress=gr.Progress()):
99
+ # Clean progress bar logic: explicitly call progress()
100
+ progress(0, desc="Starting...")
101
+
102
  api_key = os.environ.get("ERNIE_API_KEY")
103
  if not api_key: raise gr.Error("Server Config Error: API KEY missing.")
104
  if not video_file: raise gr.Error("Please upload a video.")
 
114
 
115
  client = OpenAI(api_key=api_key, base_url=BASE_URL)
116
 
117
+ progress(0.1, desc="Step 1/3: Extracting frames...")
118
  chunks = extract_frames(video_file)
119
  if not chunks: raise gr.Error("Frame extraction failed.")
120
 
121
+ progress(0.3, desc="Step 2/3: ERNIE Analyzing content...")
122
  chunk_summaries = {}
123
+
124
+ # Using ThreadPool without tqdm to avoid UI glitches
125
  with ThreadPoolExecutor(max_workers=MAX_CONCURRENT_REQUESTS) as executor:
126
  future_to_chunk = {executor.submit(process_chunk_with_retry, client, i, chunk): i for i, chunk in enumerate(chunks)}
127
+
128
+ total_chunks = len(chunks)
129
+ completed = 0
130
+
131
+ for future in as_completed(future_to_chunk):
132
  idx, summary = future.result()
133
  if summary: chunk_summaries[idx] = summary
134
+
135
+ completed += 1
136
+ # Smooth progress update from 0.3 to 0.8
137
+ current_progress = 0.3 + (0.5 * (completed / total_chunks))
138
+ progress(current_progress, desc=f"Step 2/3: Analyzing segment {completed}/{total_chunks}")
139
 
140
+ progress(0.85, desc="Step 3/3: Synthesizing final code...")
141
  html_code = aggregate_and_generate_webpage(client, chunk_summaries)
142
 
 
143
  output_path = "generated_website.html"
144
  with open(output_path, "w", encoding="utf-8") as f:
145
  f.write(html_code)
146
 
147
+ # Create Iframe
 
 
148
  b64_html = base64.b64encode(html_code.encode('utf-8')).decode('utf-8')
149
  data_uri = f"data:text/html;charset=utf-8;base64,{b64_html}"
150
+ iframe_html = f"""<iframe src="{data_uri}" width="100%" height="600px" style="border: 1px solid #ccc; border-radius: 8px; background-color: white;"></iframe>"""
151
 
152
+ progress(1.0, desc="Completed!")
 
 
 
 
 
 
 
 
 
 
 
153
  return iframe_html, output_path, html_code
154
 
155
  # --- UI ---
156
 
157
  with gr.Blocks(title="Ernie 4.5 Video2Code", theme=gr.themes.Soft()) as demo:
 
 
158
 
159
+ # --- Header & Description (Goal 3) ---
160
+ gr.Markdown("# ⚡ ERNIE 4.5-VL: Video to Code Agent")
161
+
162
+ with gr.Accordion("📚 Technical Capabilities of ERNIE 4.5-VL", open=False):
163
+ gr.Markdown("""
164
+ This application is powered by **Baidu ERNIE 4.5 **, a state-of-the-art foundation model with specific enhancements for video understanding:
165
+
166
+ * **👁️ Multimodal Heterogeneous MoE**: Uses dedicated vision experts to process images and video frames without interfering with text generation capabilities.
167
+ * **⏳ 3D-RoPE Temporal Modeling**: Incorporates 3D Rotary Position Embeddings to independently encode temporal, width, and height information, allowing precise understanding of event sequences in videos.
168
+ * **📐 Adaptive Resolution**: Dynamically adjusts to different video aspect ratios, ensuring fine-grained details (like small code font on screen) are captured accurately.
169
+ * **🚀 Long Context Window**: Supports up to 128k context length, enabling the analysis of longer tutorials and complex logic flows.
170
+ """)
171
+
172
+ gr.Markdown("Upload a frontend coding tutorial video (or try the example below). The AI will watch it, understand the code, and render the result instantly.")
173
+
174
  with gr.Row():
175
  with gr.Column(scale=1):
176
+ # --- Input Section ---
177
+ video_input = gr.Video(label="Upload Video", format="mp4", height=320)
178
+
179
+ # --- Goal 1: Examples Component ---
180
+ # 用户点击这里的视频,会自动填充到上面的 video_input 中
181
+ gr.Examples(
182
+ examples=[["sample_demo.mp4"]], # ⚠️ 确保你上传了名为 sample_demo.mp4 的文件
183
+ inputs=[video_input],
184
+ label="▶️ Or try this example video:",
185
+ cache_examples=False # 关闭缓存以节省空间
186
+ )
187
+
188
  submit_btn = gr.Button("🚀 Generate & Render", variant="primary", size="lg")
189
 
190
  with gr.Column(scale=2):
191
+ # --- Output Section ---
192
  with gr.Tabs():
193
  with gr.TabItem("🌐 Live Preview (Result)"):
 
194
  html_preview = gr.HTML(label="Rendered Page")
195
 
196
  with gr.TabItem("📝 Source Code"):