| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "global_step": 56860, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9824129440731624e-05, | |
| "loss": 0.5315, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.9648258881463246e-05, | |
| "loss": 0.2968, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.9472388322194867e-05, | |
| "loss": 0.2756, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.929651776292649e-05, | |
| "loss": 0.2673, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.9120647203658108e-05, | |
| "loss": 0.2521, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.894477664438973e-05, | |
| "loss": 0.2405, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.8768906085121352e-05, | |
| "loss": 0.2264, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.8593035525852974e-05, | |
| "loss": 0.2264, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.8417164966584596e-05, | |
| "loss": 0.2231, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.8241294407316218e-05, | |
| "loss": 0.6134, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.806542384804784e-05, | |
| "loss": 0.6581, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.788955328877946e-05, | |
| "loss": 0.6592, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 1.771368272951108e-05, | |
| "loss": 0.6561, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.7537812170242702e-05, | |
| "loss": 0.6599, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.7361941610974324e-05, | |
| "loss": 0.6569, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.7186071051705946e-05, | |
| "loss": 0.6531, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.7010200492437568e-05, | |
| "loss": 0.2212, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.683432993316919e-05, | |
| "loss": 0.4708, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 1.665845937390081e-05, | |
| "loss": 0.2605, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 1.6482588814632434e-05, | |
| "loss": 0.1987, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.6306718255364052e-05, | |
| "loss": 0.1908, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.6130847696095674e-05, | |
| "loss": 0.185, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.5954977136827296e-05, | |
| "loss": 0.1767, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.5779106577558918e-05, | |
| "loss": 0.1551, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.560323601829054e-05, | |
| "loss": 0.1476, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.5427365459022162e-05, | |
| "loss": 0.1449, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.5251494899753782e-05, | |
| "loss": 0.1566, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 1.5075624340485404e-05, | |
| "loss": 0.1586, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 1.4899753781217026e-05, | |
| "loss": 0.1487, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 1.4723883221948648e-05, | |
| "loss": 0.1493, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 1.4548012662680268e-05, | |
| "loss": 0.1547, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 1.437214210341189e-05, | |
| "loss": 0.1528, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.4196271544143512e-05, | |
| "loss": 0.1447, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.4020400984875134e-05, | |
| "loss": 0.1562, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 1.3844530425606754e-05, | |
| "loss": 0.1022, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 1.3668659866338376e-05, | |
| "loss": 0.099, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 1.3492789307069998e-05, | |
| "loss": 0.0971, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 1.331691874780162e-05, | |
| "loss": 0.1046, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 1.314104818853324e-05, | |
| "loss": 0.1083, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 1.2965177629264862e-05, | |
| "loss": 0.0998, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 1.2789307069996484e-05, | |
| "loss": 0.1049, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 1.2613436510728106e-05, | |
| "loss": 0.1052, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 1.2437565951459726e-05, | |
| "loss": 0.108, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 1.2261695392191348e-05, | |
| "loss": 0.0991, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 1.208582483292297e-05, | |
| "loss": 0.1024, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 1.1909954273654592e-05, | |
| "loss": 0.0822, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 1.1734083714386212e-05, | |
| "loss": 0.069, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 1.1558213155117834e-05, | |
| "loss": 0.0686, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 1.1382342595849456e-05, | |
| "loss": 0.0709, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 1.1206472036581078e-05, | |
| "loss": 0.0747, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 1.1030601477312698e-05, | |
| "loss": 0.0658, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 1.085473091804432e-05, | |
| "loss": 0.0732, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 1.0678860358775942e-05, | |
| "loss": 0.0715, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 1.0502989799507564e-05, | |
| "loss": 0.0778, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 1.0327119240239184e-05, | |
| "loss": 0.0724, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 1.0151248680970806e-05, | |
| "loss": 0.0722, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 9.975378121702428e-06, | |
| "loss": 0.0691, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 9.79950756243405e-06, | |
| "loss": 0.0485, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 9.62363700316567e-06, | |
| "loss": 0.0503, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 9.447766443897292e-06, | |
| "loss": 0.0536, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 9.271895884628914e-06, | |
| "loss": 0.0503, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 9.096025325360536e-06, | |
| "loss": 0.0452, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 8.920154766092157e-06, | |
| "loss": 0.0473, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 8.744284206823778e-06, | |
| "loss": 0.0488, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 8.5684136475554e-06, | |
| "loss": 0.0507, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 8.392543088287022e-06, | |
| "loss": 0.0467, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 8.216672529018643e-06, | |
| "loss": 0.0556, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 8.040801969750265e-06, | |
| "loss": 0.0481, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 7.864931410481886e-06, | |
| "loss": 0.0367, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 7.689060851213508e-06, | |
| "loss": 0.0329, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 7.5131902919451295e-06, | |
| "loss": 0.0294, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 7.3373197326767506e-06, | |
| "loss": 0.032, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 7.1614491734083725e-06, | |
| "loss": 0.0331, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 6.985578614139994e-06, | |
| "loss": 0.0274, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 6.8097080548716155e-06, | |
| "loss": 0.0385, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 6.633837495603237e-06, | |
| "loss": 0.0367, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 6.4579669363348586e-06, | |
| "loss": 0.0447, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 6.28209637706648e-06, | |
| "loss": 0.0433, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 6.106225817798102e-06, | |
| "loss": 0.034, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 5.930355258529723e-06, | |
| "loss": 0.0311, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 5.754484699261345e-06, | |
| "loss": 0.0189, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 5.578614139992966e-06, | |
| "loss": 0.02, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 5.402743580724588e-06, | |
| "loss": 0.0218, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 5.226873021456209e-06, | |
| "loss": 0.0217, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 5.051002462187831e-06, | |
| "loss": 0.0282, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 4.875131902919452e-06, | |
| "loss": 0.0267, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 4.699261343651073e-06, | |
| "loss": 0.026, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 4.523390784382695e-06, | |
| "loss": 0.0217, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 4.347520225114316e-06, | |
| "loss": 0.0194, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 4.171649665845938e-06, | |
| "loss": 0.026, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 3.995779106577559e-06, | |
| "loss": 0.0274, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "learning_rate": 3.819908547309181e-06, | |
| "loss": 0.0107, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 3.6440379880408023e-06, | |
| "loss": 0.0103, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 3.468167428772424e-06, | |
| "loss": 0.0172, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "learning_rate": 3.2922968695040454e-06, | |
| "loss": 0.0129, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 3.116426310235667e-06, | |
| "loss": 0.017, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 2.9405557509672884e-06, | |
| "loss": 0.0204, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "learning_rate": 2.76468519169891e-06, | |
| "loss": 0.0128, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 2.5888146324305314e-06, | |
| "loss": 0.0213, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "learning_rate": 2.412944073162153e-06, | |
| "loss": 0.0125, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 2.2370735138937744e-06, | |
| "loss": 0.0128, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "learning_rate": 2.061202954625396e-06, | |
| "loss": 0.0182, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "learning_rate": 1.8853323953570175e-06, | |
| "loss": 0.0116, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "learning_rate": 1.709461836088639e-06, | |
| "loss": 0.0081, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 1.5335912768202605e-06, | |
| "loss": 0.0084, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 1.357720717551882e-06, | |
| "loss": 0.0076, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "learning_rate": 1.1818501582835035e-06, | |
| "loss": 0.0089, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 1.005979599015125e-06, | |
| "loss": 0.0067, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 8.301090397467465e-07, | |
| "loss": 0.0066, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 6.542384804783681e-07, | |
| "loss": 0.0097, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 4.783679212099895e-07, | |
| "loss": 0.0099, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "learning_rate": 3.02497361941611e-07, | |
| "loss": 0.0059, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 1.2662680267323252e-07, | |
| "loss": 0.0075, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 56860, | |
| "total_flos": 9.365103496606515e+17, | |
| "train_runtime": 69350.928, | |
| "train_samples_per_second": 52.464, | |
| "train_steps_per_second": 0.82 | |
| } | |
| ], | |
| "max_steps": 56860, | |
| "num_train_epochs": 10, | |
| "total_flos": 9.365103496606515e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |