ocr_rl / trainer_state.json

Upload folder using huggingface_hub

7c7e92c verified 4 months ago

10.5 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.02706359945872801,
	"eval_steps": 500,
	"global_step": 100,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.5125,
	"completions/max_length": 438.6,
	"completions/max_terminated_length": 181.6,
	"completions/mean_length": 422.075,
	"completions/mean_terminated_length": 165.6982177734375,
	"completions/min_length": 406.4,
	"completions/min_terminated_length": 150.4,
	"entropy": 0.45910371616482737,
	"epoch": 0.0027063599458728013,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.09368716925382614,
	"learning_rate": 9.975642760487146e-06,
	"loss": 0.0087,
	"num_tokens": 78806.0,
	"reward": 0.19168390333652496,
	"reward_std": 0.042040593549609186,
	"rewards/accuracy_reward/mean": 0.19168390333652496,
	"rewards/accuracy_reward/std": 0.04204059485346079,
	"step": 10
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.45,
	"completions/max_length": 322.8,
	"completions/max_terminated_length": 162.1,
	"completions/mean_length": 313.925,
	"completions/mean_terminated_length": 152.84583435058593,
	"completions/min_length": 298.5,
	"completions/min_terminated_length": 144.9,
	"entropy": 0.4505070824176073,
	"epoch": 0.005412719891745603,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.047858916223049164,
	"learning_rate": 9.948579161028418e-06,
	"loss": -0.0106,
	"num_tokens": 143360.0,
	"reward": 0.17426907550543547,
	"reward_std": 0.05201733000576496,
	"rewards/accuracy_reward/mean": 0.17426907550543547,
	"rewards/accuracy_reward/std": 0.05201732954010367,
	"step": 20
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.2875,
	"completions/max_length": 304.4,
	"completions/max_terminated_length": 197.6,
	"completions/mean_length": 278.7875,
	"completions/mean_terminated_length": 166.95833435058594,
	"completions/min_length": 248.0,
	"completions/min_terminated_length": 145.6,
	"entropy": 0.6422165723517537,
	"epoch": 0.008119079837618403,
	"frac_reward_zero_std": 0.1,
	"grad_norm": 0.04068256542086601,
	"learning_rate": 9.92151556156969e-06,
	"loss": 0.0031,
	"num_tokens": 210607.0,
	"reward": 0.2006674014031887,
	"reward_std": 0.07132247723639011,
	"rewards/accuracy_reward/mean": 0.2006674014031887,
	"rewards/accuracy_reward/std": 0.0713224794715643,
	"step": 30
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.5125,
	"completions/max_length": 363.5,
	"completions/max_terminated_length": 87.0,
	"completions/mean_length": 335.375,
	"completions/mean_terminated_length": 76.44464416503907,
	"completions/min_length": 324.8,
	"completions/min_terminated_length": 68.8,
	"entropy": 0.46362581551074983,
	"epoch": 0.010825439783491205,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.04135835915803909,
	"learning_rate": 9.894451962110961e-06,
	"loss": 0.0425,
	"num_tokens": 286613.0,
	"reward": 0.16309105940163135,
	"reward_std": 0.06764648640528322,
	"rewards/accuracy_reward/mean": 0.16309105940163135,
	"rewards/accuracy_reward/std": 0.06764648780226708,
	"step": 40
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.5625,
	"completions/max_length": 345.9,
	"completions/max_terminated_length": 137.4,
	"completions/mean_length": 341.675,
	"completions/mean_terminated_length": 133.05,
	"completions/min_length": 332.6,
	"completions/min_terminated_length": 127.8,
	"entropy": 0.5505448803305626,
	"epoch": 0.013531799729364006,
	"frac_reward_zero_std": 0.1,
	"grad_norm": 0.05425691232085228,
	"learning_rate": 9.867388362652234e-06,
	"loss": 0.0032,
	"num_tokens": 372011.0,
	"reward": 0.18142173625528812,
	"reward_std": 0.046853833552449944,
	"rewards/accuracy_reward/mean": 0.18142173625528812,
	"rewards/accuracy_reward/std": 0.04685383513569832,
	"step": 50
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.5375,
	"completions/max_length": 419.1,
	"completions/max_terminated_length": 162.8,
	"completions/mean_length": 400.8875,
	"completions/mean_terminated_length": 144.52000122070314,
	"completions/min_length": 384.8,
	"completions/min_terminated_length": 128.8,
	"entropy": 0.4113187978975475,
	"epoch": 0.016238159675236806,
	"frac_reward_zero_std": 0.1,
	"grad_norm": 0.03674859553575516,
	"learning_rate": 9.840324763193504e-06,
	"loss": 0.0156,
	"num_tokens": 451810.0,
	"reward": 0.21405077129602432,
	"reward_std": 0.04568238444626331,
	"rewards/accuracy_reward/mean": 0.21405077129602432,
	"rewards/accuracy_reward/std": 0.0456823855638504,
	"step": 60
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.5875,
	"completions/max_length": 387.3,
	"completions/max_terminated_length": 170.3,
	"completions/mean_length": 365.6,
	"completions/mean_terminated_length": 134.99667053222657,
	"completions/min_length": 293.5,
	"completions/min_terminated_length": 88.7,
	"entropy": 0.6454691726714372,
	"epoch": 0.018944519621109608,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.03896205499768257,
	"learning_rate": 9.813261163734777e-06,
	"loss": -0.0027,
	"num_tokens": 522034.0,
	"reward": 0.16922515165060759,
	"reward_std": 0.07712158742360771,
	"rewards/accuracy_reward/mean": 0.16922515165060759,
	"rewards/accuracy_reward/std": 0.07712158723734319,
	"step": 70
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.3875,
	"completions/max_length": 379.3,
	"completions/max_terminated_length": 224.1,
	"completions/mean_length": 365.6125,
	"completions/mean_terminated_length": 210.74881591796876,
	"completions/min_length": 342.2,
	"completions/min_terminated_length": 188.6,
	"entropy": 0.31367158964276315,
	"epoch": 0.02165087956698241,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.05739288777112961,
	"learning_rate": 9.78619756427605e-06,
	"loss": -0.0015,
	"num_tokens": 602675.0,
	"reward": 0.39022472202777864,
	"reward_std": 0.06431488357484341,
	"rewards/accuracy_reward/mean": 0.39022472202777864,
	"rewards/accuracy_reward/std": 0.06431488748639821,
	"step": 80
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.175,
	"completions/max_length": 275.6,
	"completions/max_terminated_length": 222.9,
	"completions/mean_length": 259.5,
	"completions/mean_terminated_length": 206.6125,
	"completions/min_length": 241.8,
	"completions/min_terminated_length": 190.6,
	"entropy": 0.3430396350100636,
	"epoch": 0.02435723951285521,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.01871815323829651,
	"learning_rate": 9.759133964817322e-06,
	"loss": 0.0226,
	"num_tokens": 664643.0,
	"reward": 0.34078182056546213,
	"reward_std": 0.07903781468048691,
	"rewards/accuracy_reward/mean": 0.34078182056546213,
	"rewards/accuracy_reward/std": 0.07903781542554497,
	"step": 90
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.5875,
	"completions/max_length": 413.8,
	"completions/max_terminated_length": 148.1,
	"completions/mean_length": 403.2,
	"completions/mean_terminated_length": 138.7125,
	"completions/min_length": 384.9,
	"completions/min_terminated_length": 128.9,
	"entropy": 0.4610064772889018,
	"epoch": 0.02706359945872801,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.04077404364943504,
	"learning_rate": 9.732070365358594e-06,
	"loss": 0.0033,
	"num_tokens": 734635.0,
	"reward": 0.18911832235753537,
	"reward_std": 0.05935304025188089,
	"rewards/accuracy_reward/mean": 0.18911832235753537,
	"rewards/accuracy_reward/std": 0.05935304341837764,
	"step": 100
	}
	],
	"logging_steps": 10,
	"max_steps": 3695,
	"num_input_tokens_seen": 734635,
	"num_train_epochs": 1,
	"save_steps": 10,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 0.0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}