| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.02706359945872801, | |
| "eval_steps": 500, | |
| "global_step": 100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5125, | |
| "completions/max_length": 438.6, | |
| "completions/max_terminated_length": 181.6, | |
| "completions/mean_length": 422.075, | |
| "completions/mean_terminated_length": 165.6982177734375, | |
| "completions/min_length": 406.4, | |
| "completions/min_terminated_length": 150.4, | |
| "entropy": 0.45910371616482737, | |
| "epoch": 0.0027063599458728013, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.09368716925382614, | |
| "learning_rate": 9.975642760487146e-06, | |
| "loss": 0.0087, | |
| "num_tokens": 78806.0, | |
| "reward": 0.19168390333652496, | |
| "reward_std": 0.042040593549609186, | |
| "rewards/accuracy_reward/mean": 0.19168390333652496, | |
| "rewards/accuracy_reward/std": 0.04204059485346079, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.45, | |
| "completions/max_length": 322.8, | |
| "completions/max_terminated_length": 162.1, | |
| "completions/mean_length": 313.925, | |
| "completions/mean_terminated_length": 152.84583435058593, | |
| "completions/min_length": 298.5, | |
| "completions/min_terminated_length": 144.9, | |
| "entropy": 0.4505070824176073, | |
| "epoch": 0.005412719891745603, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.047858916223049164, | |
| "learning_rate": 9.948579161028418e-06, | |
| "loss": -0.0106, | |
| "num_tokens": 143360.0, | |
| "reward": 0.17426907550543547, | |
| "reward_std": 0.05201733000576496, | |
| "rewards/accuracy_reward/mean": 0.17426907550543547, | |
| "rewards/accuracy_reward/std": 0.05201732954010367, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.2875, | |
| "completions/max_length": 304.4, | |
| "completions/max_terminated_length": 197.6, | |
| "completions/mean_length": 278.7875, | |
| "completions/mean_terminated_length": 166.95833435058594, | |
| "completions/min_length": 248.0, | |
| "completions/min_terminated_length": 145.6, | |
| "entropy": 0.6422165723517537, | |
| "epoch": 0.008119079837618403, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 0.04068256542086601, | |
| "learning_rate": 9.92151556156969e-06, | |
| "loss": 0.0031, | |
| "num_tokens": 210607.0, | |
| "reward": 0.2006674014031887, | |
| "reward_std": 0.07132247723639011, | |
| "rewards/accuracy_reward/mean": 0.2006674014031887, | |
| "rewards/accuracy_reward/std": 0.0713224794715643, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5125, | |
| "completions/max_length": 363.5, | |
| "completions/max_terminated_length": 87.0, | |
| "completions/mean_length": 335.375, | |
| "completions/mean_terminated_length": 76.44464416503907, | |
| "completions/min_length": 324.8, | |
| "completions/min_terminated_length": 68.8, | |
| "entropy": 0.46362581551074983, | |
| "epoch": 0.010825439783491205, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.04135835915803909, | |
| "learning_rate": 9.894451962110961e-06, | |
| "loss": 0.0425, | |
| "num_tokens": 286613.0, | |
| "reward": 0.16309105940163135, | |
| "reward_std": 0.06764648640528322, | |
| "rewards/accuracy_reward/mean": 0.16309105940163135, | |
| "rewards/accuracy_reward/std": 0.06764648780226708, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 345.9, | |
| "completions/max_terminated_length": 137.4, | |
| "completions/mean_length": 341.675, | |
| "completions/mean_terminated_length": 133.05, | |
| "completions/min_length": 332.6, | |
| "completions/min_terminated_length": 127.8, | |
| "entropy": 0.5505448803305626, | |
| "epoch": 0.013531799729364006, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 0.05425691232085228, | |
| "learning_rate": 9.867388362652234e-06, | |
| "loss": 0.0032, | |
| "num_tokens": 372011.0, | |
| "reward": 0.18142173625528812, | |
| "reward_std": 0.046853833552449944, | |
| "rewards/accuracy_reward/mean": 0.18142173625528812, | |
| "rewards/accuracy_reward/std": 0.04685383513569832, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5375, | |
| "completions/max_length": 419.1, | |
| "completions/max_terminated_length": 162.8, | |
| "completions/mean_length": 400.8875, | |
| "completions/mean_terminated_length": 144.52000122070314, | |
| "completions/min_length": 384.8, | |
| "completions/min_terminated_length": 128.8, | |
| "entropy": 0.4113187978975475, | |
| "epoch": 0.016238159675236806, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 0.03674859553575516, | |
| "learning_rate": 9.840324763193504e-06, | |
| "loss": 0.0156, | |
| "num_tokens": 451810.0, | |
| "reward": 0.21405077129602432, | |
| "reward_std": 0.04568238444626331, | |
| "rewards/accuracy_reward/mean": 0.21405077129602432, | |
| "rewards/accuracy_reward/std": 0.0456823855638504, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5875, | |
| "completions/max_length": 387.3, | |
| "completions/max_terminated_length": 170.3, | |
| "completions/mean_length": 365.6, | |
| "completions/mean_terminated_length": 134.99667053222657, | |
| "completions/min_length": 293.5, | |
| "completions/min_terminated_length": 88.7, | |
| "entropy": 0.6454691726714372, | |
| "epoch": 0.018944519621109608, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.03896205499768257, | |
| "learning_rate": 9.813261163734777e-06, | |
| "loss": -0.0027, | |
| "num_tokens": 522034.0, | |
| "reward": 0.16922515165060759, | |
| "reward_std": 0.07712158742360771, | |
| "rewards/accuracy_reward/mean": 0.16922515165060759, | |
| "rewards/accuracy_reward/std": 0.07712158723734319, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3875, | |
| "completions/max_length": 379.3, | |
| "completions/max_terminated_length": 224.1, | |
| "completions/mean_length": 365.6125, | |
| "completions/mean_terminated_length": 210.74881591796876, | |
| "completions/min_length": 342.2, | |
| "completions/min_terminated_length": 188.6, | |
| "entropy": 0.31367158964276315, | |
| "epoch": 0.02165087956698241, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.05739288777112961, | |
| "learning_rate": 9.78619756427605e-06, | |
| "loss": -0.0015, | |
| "num_tokens": 602675.0, | |
| "reward": 0.39022472202777864, | |
| "reward_std": 0.06431488357484341, | |
| "rewards/accuracy_reward/mean": 0.39022472202777864, | |
| "rewards/accuracy_reward/std": 0.06431488748639821, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.175, | |
| "completions/max_length": 275.6, | |
| "completions/max_terminated_length": 222.9, | |
| "completions/mean_length": 259.5, | |
| "completions/mean_terminated_length": 206.6125, | |
| "completions/min_length": 241.8, | |
| "completions/min_terminated_length": 190.6, | |
| "entropy": 0.3430396350100636, | |
| "epoch": 0.02435723951285521, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.01871815323829651, | |
| "learning_rate": 9.759133964817322e-06, | |
| "loss": 0.0226, | |
| "num_tokens": 664643.0, | |
| "reward": 0.34078182056546213, | |
| "reward_std": 0.07903781468048691, | |
| "rewards/accuracy_reward/mean": 0.34078182056546213, | |
| "rewards/accuracy_reward/std": 0.07903781542554497, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5875, | |
| "completions/max_length": 413.8, | |
| "completions/max_terminated_length": 148.1, | |
| "completions/mean_length": 403.2, | |
| "completions/mean_terminated_length": 138.7125, | |
| "completions/min_length": 384.9, | |
| "completions/min_terminated_length": 128.9, | |
| "entropy": 0.4610064772889018, | |
| "epoch": 0.02706359945872801, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.04077404364943504, | |
| "learning_rate": 9.732070365358594e-06, | |
| "loss": 0.0033, | |
| "num_tokens": 734635.0, | |
| "reward": 0.18911832235753537, | |
| "reward_std": 0.05935304025188089, | |
| "rewards/accuracy_reward/mean": 0.18911832235753537, | |
| "rewards/accuracy_reward/std": 0.05935304341837764, | |
| "step": 100 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3695, | |
| "num_input_tokens_seen": 734635, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |