Spaces:
Sleeping
Sleeping
Luis Chaves Rodriguez
added dev make statement pinning python version and link to diabetes df dataset
17fec11
| # /// script | |
| # requires-python = ">=3.12" | |
| # dependencies = [ | |
| # "ipython==9.0.2", | |
| # "ipywidgets==8.1.5", | |
| # "marimo", | |
| # "numpy==2.1.3", | |
| # "pandas==2.2.3", | |
| # "scikit-learn==1.6.1", | |
| # "setuptools==78.1.0", | |
| # "ydata-profiling==4.16.1", | |
| # ] | |
| # /// | |
| import marimo | |
| __generated_with = "0.12.9" | |
| app = marimo.App(width="medium") | |
| def _(): | |
| import marimo as mo | |
| with mo.status.spinner("Importing libraries..."): | |
| import numpy as np | |
| import pandas as pd | |
| from ydata_profiling import ProfileReport | |
| from sklearn import datasets | |
| return ProfileReport, datasets, mo, np, pd | |
| def _(mo): | |
| mo.md( | |
| r""" | |
| # On-the-fly YData Profiling | |
| It is often desirable to have a quick EDA report when you receive a fresh CSV. [YData Profiling](https://docs.profiling.ydata.ai/latest/) is a useful tool to streamline that tool. This small [marimo](https://marimo.io) app enables you to drag-and-drop a CSV and generate an HTML report that you can visualise in your browser or download as an HTML report. | |
| **Note: No data gets stored when you used this service** | |
| """ | |
| ) | |
| return | |
| def _(mo, try_default_df): | |
| uploaded_file = mo.ui.file( | |
| filetypes=[".csv"], | |
| kind='area', | |
| label = "Drag and drop a CSV file here, or click to open file browser" | |
| ) if not try_default_df.value else None | |
| uploaded_file | |
| return (uploaded_file,) | |
| def _(mo): | |
| try_default_df = mo.ui.switch(label="Try default dataset ([`diabetes` from scikit-learn](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset))") | |
| try_default_df | |
| return (try_default_df,) | |
| def _(mo): | |
| minimal = mo.ui.checkbox(True, label="Minimal profiling") | |
| minimal | |
| return (minimal,) | |
| def _(datasets, mo, pd, try_default_df, uploaded_file): | |
| should_stop = not try_default_df.value and len(uploaded_file.value) == 0 | |
| mo.stop(should_stop , mo.callout("⬆️ Please choose a source dataframe above!")) | |
| if try_default_df.value: | |
| diabetes = datasets.load_diabetes() | |
| df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names) | |
| else: | |
| df = pd.read_csv(uploaded_file.value[0].contents) | |
| return df, diabetes, should_stop | |
| def _(mo, profile): | |
| mo.download( | |
| data=profile.html, | |
| filename="ydata.html", | |
| mimetype="text/html", | |
| label = "Download YData Profile" | |
| ) | |
| return | |
| def _(ProfileReport, df, minimal, mo): | |
| with mo.status.spinner("Generating YData Profile report..."): | |
| profile = ProfileReport(df,minimal = minimal.value, title="YData Profiling Report") | |
| profile.to_notebook_iframe() | |
| return (profile,) | |
| if __name__ == "__main__": | |
| app.run() | |