Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| # Static data - reordered columns: Method, #Param., Input Type, Control Type, Model Type, Mean Traj. β, Acc. β | |
| STATIC_DATA = [ | |
| ["w/o WM", "72B", "RGB", "β", "VLM", 6.24, 50.27], | |
| ["PathDreamer [36]", "0.69B", "RGB-D; Sem; Pano", "Viewpoint", "Image Gen.", 5.28, 56.99], | |
| ["SE3DS [11]", "1.1B", "RGB-D; Pano", "Viewpoint", "Image Gen.", 5.29, 57.53], | |
| ["NWM [25]", "1B", "RGB", "Trajectory", "Video Gen.", 5.68, 57.35], | |
| ["SVD [6]", "1.5B", "RGB", "Image", "Video Gen.", 5.29, 57.71], | |
| ["LTX-Video [5]", "2B", "RGB", "Text", "Video Gen.", 5.37, 56.08], | |
| ["Hunyuan [4]", "13B", "RGB", "Text", "Video Gen.", 5.21, 57.71], | |
| ["Wan2.1 [23]", "14B", "RGB", "Text", "Video Gen.", 5.24, 58.26], | |
| ["Cosmos [1]", "2B", "RGB", "Text", "Video Gen.", 5.898, 52.27], | |
| ["Runway", "β", "β", "Text", "Video Gen.", "β", "β"], | |
| ["SVDβ [6]", "1.5B", "RGB; Pano", "Action", "Video Gen. Post-Train", 5.02, 60.98], | |
| ["LTXβ [5]", "2B", "RGB; Pano", "Action", "Video Gen. Post-Train", 5.49, 57.53], | |
| ["WAN2.1β [23]", "14B", "RGB; Pano", "Action", "Video Gen. Post-Train", "XXX", "XXX"], | |
| ["Cosmosβ [1]", "2B", "RGB; Pano", "Action", "Video Gen. Post-Train", 5.08, 60.25], | |
| ] | |
| COLUMNS = ["Method", "#Param.", "Input Type", "Control Type", "Model Type", "Mean Traj. β", "Acc. β"] | |
| def create_leaderboard(): | |
| df = pd.DataFrame(STATIC_DATA, columns=COLUMNS) | |
| # Sort by accuracy in descending order (highest first), handling non-numeric values | |
| df_clean = df.copy() | |
| # Replace non-numeric values with -1 for sorting (so they appear at bottom) | |
| df_clean['Acc. β'] = pd.to_numeric(df_clean['Acc. β'], errors='coerce').fillna(-1) | |
| df_sorted = df_clean.sort_values('Acc. β', ascending=False) | |
| # Return original df with the sorted order but original values | |
| return df.iloc[df_sorted.index].reset_index(drop=True) | |
| with gr.Blocks(title="World-in-World: Building a Closed-Loop World Interface to Evaluate World Models", theme=gr.themes.Soft()) as demo: | |
| gr.HTML("<h1 style='text-align: center; margin-bottom: 1rem'>π World-in-World: Building a Closed-Loop World Interface to Evaluate World Models</h1>") | |
| with gr.Tabs(): | |
| with gr.TabItem("π Leaderboard"): | |
| leaderboard_table = gr.DataFrame( | |
| value=create_leaderboard(), | |
| headers=COLUMNS, | |
| datatype=["str", "str", "str", "str", "str", "number", "number"], | |
| interactive=False, | |
| wrap=True | |
| ) | |
| with gr.TabItem("π About"): | |
| gr.Markdown(""" | |
| # World-in-World: Building a Closed-Loop World Interface to Evaluate World Models | |
| This leaderboard showcases performance metrics across different types of AI models in world modeling tasks: | |
| ## Model Categories | |
| - **VLM**: Vision-Language Models | |
| - **Image Gen.**: Image Generation Models | |
| - **Video Gen.**: Video Generation Models | |
| - **Video Gen. Post-Train**: Post-training specialized Video Generation Models | |
| ## Metrics Explained | |
| - **Acc. β**: Accuracy score (higher values indicate better performance) | |
| - **Mean Traj. β**: Mean trajectory error (lower values indicate better performance) | |
| ## Notes | |
| - β indicates post-training specialized models | |
| - XXX indicates results pending/unavailable | |
| - β indicates not applicable or not available | |
| *Results represent performance on world modeling evaluation benchmarks and may vary across different evaluation settings.* | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |