ocr_rl / trainer_state.json
MohamedZayton's picture
Upload folder using huggingface_hub
7c7e92c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.02706359945872801,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5125,
"completions/max_length": 438.6,
"completions/max_terminated_length": 181.6,
"completions/mean_length": 422.075,
"completions/mean_terminated_length": 165.6982177734375,
"completions/min_length": 406.4,
"completions/min_terminated_length": 150.4,
"entropy": 0.45910371616482737,
"epoch": 0.0027063599458728013,
"frac_reward_zero_std": 0.0,
"grad_norm": 0.09368716925382614,
"learning_rate": 9.975642760487146e-06,
"loss": 0.0087,
"num_tokens": 78806.0,
"reward": 0.19168390333652496,
"reward_std": 0.042040593549609186,
"rewards/accuracy_reward/mean": 0.19168390333652496,
"rewards/accuracy_reward/std": 0.04204059485346079,
"step": 10
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.45,
"completions/max_length": 322.8,
"completions/max_terminated_length": 162.1,
"completions/mean_length": 313.925,
"completions/mean_terminated_length": 152.84583435058593,
"completions/min_length": 298.5,
"completions/min_terminated_length": 144.9,
"entropy": 0.4505070824176073,
"epoch": 0.005412719891745603,
"frac_reward_zero_std": 0.0,
"grad_norm": 0.047858916223049164,
"learning_rate": 9.948579161028418e-06,
"loss": -0.0106,
"num_tokens": 143360.0,
"reward": 0.17426907550543547,
"reward_std": 0.05201733000576496,
"rewards/accuracy_reward/mean": 0.17426907550543547,
"rewards/accuracy_reward/std": 0.05201732954010367,
"step": 20
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.2875,
"completions/max_length": 304.4,
"completions/max_terminated_length": 197.6,
"completions/mean_length": 278.7875,
"completions/mean_terminated_length": 166.95833435058594,
"completions/min_length": 248.0,
"completions/min_terminated_length": 145.6,
"entropy": 0.6422165723517537,
"epoch": 0.008119079837618403,
"frac_reward_zero_std": 0.1,
"grad_norm": 0.04068256542086601,
"learning_rate": 9.92151556156969e-06,
"loss": 0.0031,
"num_tokens": 210607.0,
"reward": 0.2006674014031887,
"reward_std": 0.07132247723639011,
"rewards/accuracy_reward/mean": 0.2006674014031887,
"rewards/accuracy_reward/std": 0.0713224794715643,
"step": 30
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5125,
"completions/max_length": 363.5,
"completions/max_terminated_length": 87.0,
"completions/mean_length": 335.375,
"completions/mean_terminated_length": 76.44464416503907,
"completions/min_length": 324.8,
"completions/min_terminated_length": 68.8,
"entropy": 0.46362581551074983,
"epoch": 0.010825439783491205,
"frac_reward_zero_std": 0.0,
"grad_norm": 0.04135835915803909,
"learning_rate": 9.894451962110961e-06,
"loss": 0.0425,
"num_tokens": 286613.0,
"reward": 0.16309105940163135,
"reward_std": 0.06764648640528322,
"rewards/accuracy_reward/mean": 0.16309105940163135,
"rewards/accuracy_reward/std": 0.06764648780226708,
"step": 40
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 345.9,
"completions/max_terminated_length": 137.4,
"completions/mean_length": 341.675,
"completions/mean_terminated_length": 133.05,
"completions/min_length": 332.6,
"completions/min_terminated_length": 127.8,
"entropy": 0.5505448803305626,
"epoch": 0.013531799729364006,
"frac_reward_zero_std": 0.1,
"grad_norm": 0.05425691232085228,
"learning_rate": 9.867388362652234e-06,
"loss": 0.0032,
"num_tokens": 372011.0,
"reward": 0.18142173625528812,
"reward_std": 0.046853833552449944,
"rewards/accuracy_reward/mean": 0.18142173625528812,
"rewards/accuracy_reward/std": 0.04685383513569832,
"step": 50
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5375,
"completions/max_length": 419.1,
"completions/max_terminated_length": 162.8,
"completions/mean_length": 400.8875,
"completions/mean_terminated_length": 144.52000122070314,
"completions/min_length": 384.8,
"completions/min_terminated_length": 128.8,
"entropy": 0.4113187978975475,
"epoch": 0.016238159675236806,
"frac_reward_zero_std": 0.1,
"grad_norm": 0.03674859553575516,
"learning_rate": 9.840324763193504e-06,
"loss": 0.0156,
"num_tokens": 451810.0,
"reward": 0.21405077129602432,
"reward_std": 0.04568238444626331,
"rewards/accuracy_reward/mean": 0.21405077129602432,
"rewards/accuracy_reward/std": 0.0456823855638504,
"step": 60
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5875,
"completions/max_length": 387.3,
"completions/max_terminated_length": 170.3,
"completions/mean_length": 365.6,
"completions/mean_terminated_length": 134.99667053222657,
"completions/min_length": 293.5,
"completions/min_terminated_length": 88.7,
"entropy": 0.6454691726714372,
"epoch": 0.018944519621109608,
"frac_reward_zero_std": 0.0,
"grad_norm": 0.03896205499768257,
"learning_rate": 9.813261163734777e-06,
"loss": -0.0027,
"num_tokens": 522034.0,
"reward": 0.16922515165060759,
"reward_std": 0.07712158742360771,
"rewards/accuracy_reward/mean": 0.16922515165060759,
"rewards/accuracy_reward/std": 0.07712158723734319,
"step": 70
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3875,
"completions/max_length": 379.3,
"completions/max_terminated_length": 224.1,
"completions/mean_length": 365.6125,
"completions/mean_terminated_length": 210.74881591796876,
"completions/min_length": 342.2,
"completions/min_terminated_length": 188.6,
"entropy": 0.31367158964276315,
"epoch": 0.02165087956698241,
"frac_reward_zero_std": 0.0,
"grad_norm": 0.05739288777112961,
"learning_rate": 9.78619756427605e-06,
"loss": -0.0015,
"num_tokens": 602675.0,
"reward": 0.39022472202777864,
"reward_std": 0.06431488357484341,
"rewards/accuracy_reward/mean": 0.39022472202777864,
"rewards/accuracy_reward/std": 0.06431488748639821,
"step": 80
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.175,
"completions/max_length": 275.6,
"completions/max_terminated_length": 222.9,
"completions/mean_length": 259.5,
"completions/mean_terminated_length": 206.6125,
"completions/min_length": 241.8,
"completions/min_terminated_length": 190.6,
"entropy": 0.3430396350100636,
"epoch": 0.02435723951285521,
"frac_reward_zero_std": 0.0,
"grad_norm": 0.01871815323829651,
"learning_rate": 9.759133964817322e-06,
"loss": 0.0226,
"num_tokens": 664643.0,
"reward": 0.34078182056546213,
"reward_std": 0.07903781468048691,
"rewards/accuracy_reward/mean": 0.34078182056546213,
"rewards/accuracy_reward/std": 0.07903781542554497,
"step": 90
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5875,
"completions/max_length": 413.8,
"completions/max_terminated_length": 148.1,
"completions/mean_length": 403.2,
"completions/mean_terminated_length": 138.7125,
"completions/min_length": 384.9,
"completions/min_terminated_length": 128.9,
"entropy": 0.4610064772889018,
"epoch": 0.02706359945872801,
"frac_reward_zero_std": 0.0,
"grad_norm": 0.04077404364943504,
"learning_rate": 9.732070365358594e-06,
"loss": 0.0033,
"num_tokens": 734635.0,
"reward": 0.18911832235753537,
"reward_std": 0.05935304025188089,
"rewards/accuracy_reward/mean": 0.18911832235753537,
"rewards/accuracy_reward/std": 0.05935304341837764,
"step": 100
}
],
"logging_steps": 10,
"max_steps": 3695,
"num_input_tokens_seen": 734635,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}